diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/.dockerignore b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/.dockerignore new file mode 100644 index 000000000..fd85b2584 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/.dockerignore @@ -0,0 +1,143 @@ +# Exclude files from Docker build context. This prevents unnecessary files from +# being sent to Docker daemon, reducing build time and image size. + +# Python artifacts +__pycache__/ +*.pyc +*.pyo +*.pyd +*.egg-info/ + +# Virtual environments +venv/ +.venv/ +env/ +.env +.envrc +client_venv.helpers/ +ENV/ + +# Jupyter +.ipynb_checkpoints/ +.jupyter/ + +# Build artifacts +build/ +dist/ +*.eggs/ +.eggs/ + +# Cache and temporary files +*.log +*.tmp +*.cache +.pytest_cache/ +.mypy_cache/ +.coverage +htmlcov/ + +# Git and version control +.git/ +.gitignore +.gitattributes +.github/ + +# Docker build scripts (not needed at runtime) +docker_build.sh +docker_push.sh +docker_clean.sh +docker_exec.sh +docker_cmd.sh +docker_bash.sh +docker_jupyter.sh +docker_name.sh +run_jupyter.sh +Dockerfile.* +.dockerignore + +# Documentation +README.md +README.admin.md +docs/ +*.md +CHANGELOG.md +LICENSE + +# Configuration and secrets +.env.* +.env.local +.env.development +.env.production +.DS_Store +Thumbs.db + +# Shell configuration +.bashrc +.bash_history +.zshrc + +# Large data files (mount via volume instead) +data/ +*.csv +*.pkl +*.h5 +*.parquet +*.feather +*.arrow +*.npy +*.npz + +# Generated images +*.png +*.jpg +*.jpeg +*.gif +*.svg +*.pdf + +# Test files and examples +tests/ +test_* +*_test.py +tutorials/ +examples/ + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.project +.pydevproject +.settings/ +*.iml +.sublime-project +.sublime-workspace + +# Node and frontend (if applicable) +node_modules/ +npm-debug.log +yarn-error.log +.npm + +# Requirements management +requirements.in +Pipfile +Pipfile.lock +poetry.lock +setup.py +setup.cfg + +# CI/CD configuration +.gitlab-ci.yml +.travis.yml +Jenkinsfile +.circleci/ + +# Miscellaneous +*.bak +.venv.bak/ +*.whl +*.tar.gz +*.zip diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile new file mode 100644 index 000000000..2b73a9278 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile @@ -0,0 +1,40 @@ +FROM python:3.12-slim + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + build-essential \ + g++ \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /install +COPY project_files/requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir \ + jupyterlab \ + jupyterlab_vim \ + jupytext \ + -r /install/requirements.txt + +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Version report +COPY version.sh /install/version.sh +RUN chmod +x /install/version.sh && \ + /install/version.sh 2>&1 | tee /install/version.log + +# Working directory +WORKDIR /app + +# HuggingFace cache dir +ENV HF_HOME=/hf_cache +ENV TRANSFORMERS_CACHE=/hf_cache + +# Jupyter port +EXPOSE 8888 + +CMD ["/bin/bash"] diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.python_slim b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.python_slim new file mode 100644 index 000000000..cc8f18f2f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.python_slim @@ -0,0 +1,28 @@ +# Use Python 3.12 slim (already has Python and pip). +FROM python:3.12-slim + +# Avoid interactive prompts during apt operations. +ENV DEBIAN_FRONTEND=noninteractive + +# Install CA certificates (needed for HTTPS). +RUN apt-get update && apt-get install -y \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install project specific packages. +RUN mkdir -p /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.ubuntu b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.ubuntu new file mode 100644 index 000000000..705105d91 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.ubuntu @@ -0,0 +1,40 @@ +FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND noninteractive + +# Install system utilities and Python in a single layer. +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + sudo \ + curl \ + git \ + build-essential \ + python3 \ + python3-pip \ + python3-dev \ + python3-venv \ + && rm -rf /var/lib/apt/lists/* + +# Create virtual environment. +RUN python3 -m venv /opt/venv + +# Make the venv the default Python. +ENV PATH="/opt/venv/bin:$PATH" + +# Install project specific packages. +RUN mkdir /install +COPY requirements.txt /install/requirements.txt +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext -r /install/requirements.txt + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.uv b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.uv new file mode 100644 index 000000000..d3b2a0abc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/Dockerfile.uv @@ -0,0 +1,49 @@ +FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND noninteractive + +# Install system utilities and Python in a single layer. +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + sudo \ + curl \ + git \ + build-essential \ + python3 \ + python3-pip \ + python3-dev \ + python3-venv \ + libgomp1 \ + g++ \ + && rm -rf /var/lib/apt/lists/* + +# Install uv for package management. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" + +# Install project specific packages using uv. +COPY pyproject.toml uv.lock /app/ +WORKDIR /app +RUN uv sync +ENV PATH="/app/.venv/bin:$PATH" + +# Install Jupyter. +RUN pip install --upgrade pip && \ + pip install --no-cache-dir jupyterlab jupyterlab_vim jupytext + +# Copy project files. +COPY . /app + +RUN mkdir /install + +# Config. +COPY etc_sudoers /install/ +COPY etc_sudoers /etc/sudoers +COPY bashrc /root/.bashrc + +# Report package versions. +COPY version.sh /install/ +RUN /install/version.sh 2>&1 | tee version.log + +# Jupyter. +EXPOSE 8888 diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/README.md b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/README.md new file mode 100644 index 000000000..dcf8bedb9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/README.md @@ -0,0 +1,418 @@ +# HuggingFace News Article Classification + +## Description + +In this project, an end-to-end pipeline will be implemented for News Article Classification with Huggingface Transformers. The system trains a DistilBERT transformer using the AG News dataset for classifying articles into four classes using transfer learning. + +The entire pipeline - data loading, preprocessing, training, evaluation, and inference — runs inside Docker, requiring no local Python environment setup beyond Docker Desktop. + +**Authors**: @riyaapuri @stupatel17 + +**Project Specs**: https://github.com/gpsaggese/gpsaggese.github.io/blob/master/class_project/data605/Spring2026/projects_descriptions/HuggingFace_Project_Description.md + +--- + +## Table of Contents + +- [Architecture](#architecture) +- [Stack](#stack) +- [Project Structure](#project-structure) +- [Prerequisites](#prerequisites) +- [Installation](#installation) +- [Usage](#usage) +- [Configuration](#configuration) +- [Pipeline Steps](#pipeline-steps) +- [Outputs](#outputs) +- [Release Notes](#release-notes) + +--- + +## Architecture + +``` +Raw News Article + │ + ▼ +┌─────────────────┐ +│ dataset_loader │ Loads AG News from HuggingFace Hub +│ │ Splits into train / validation / test +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ preprocessing │ Cleans text (HTML, URLs, whitespace) +│ │ Tokenizes with AutoTokenizer (max 128 tokens) +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ train.py │ Fine-tunes DistilBERT +│ │ Saves best checkpoint under models by macro-F1 +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ evaluate.py │ Batch inference on test set +│ │ Outputs report, confusion matrix, CSV +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ predict.py │ Takes in a single user input +│ │ Returns label, per-class confidence scores +└─────────────────┘ +``` + +**Docker layer**: All steps above run inside a single container. The project directory is volume-mounted at `/app` so code edits are reflected immediately without rebuilding. HuggingFace model downloads are persisted in a named Docker volume (`hf_cache`) so models are not re-downloaded across runs. + +--- + +## Stack + +| Layer | Library | +|---|---| +| Modeling & Tokenization | HuggingFace Transformers, Datasets | +| Training Backend | PyTorch, Accelerate | +| Evaluation | Scikit-learn | +| Hyperparameter Tuning | Optuna | +| Containerization | Docker | + +--- + +## Project Structure + +``` +project_root/ +│ +├── project_files/ +│ ├── config.py # All constants and hyperparameters +│ ├── requirements.txt # Python dependencies +│ │ +│ ├── scripts/ +│ │ ├── train.py # Fine-tuning script +│ │ ├── evaluate.py # Evaluation + result export +│ │ └── predict.py # Interactive script +│ │ +│ └── utils/ +│ ├── dataset_loader.py # Data loading and inspection +│ ├── preprocessing.py # Text cleaning and tokenization +│ └── metrics.py # Metric callbacks and report utilities +│ +├── models/ # Saved model checkpoints +│ (generated) +│ └── distilbert-ag-news/ +│ └── best/ # Best checkpoint by macro-F1 +│ +├── results/ # Evaluation outputs (generated) +│ ├── classification_report.txt +│ ├── confusion_matrix.png +│ ├── per_class_metrics.png +│ └── predictions.csv +│ +├── Dockerfile # ML-ready container image +├── docker_name.sh # Image name configuration +├── docker_utils.sh # Shared helper functions +├── run.sh # Unified pipeline wrapper +├── docker_build.sh # Build the Docker image +├── docker_dataloader.sh # Run dataset_loader.py +├── docker_train.sh # Run train.py +├── docker_evaluate.sh # Run evaluate.py +├── docker_predict.sh # Run predict.py +├── docker_bash.sh # Open interactive shell +├── docker_jupyter.sh # Launch Jupyter Lab +├── docker_clean.sh # Remove image and cache +├── run_jupyter.sh # Jupyter startup +│ (runs inside container) +├── version.sh # Package version logger +│ (runs at build) +├── bashrc # Shell config copied into image +└── etc_sudoers # Sudo config copied into image +``` + +--- + +## Prerequisites + +**Docker Desktop** is the only requirement. No local Python installation is needed. + +| OS | Instructions | +|---|---| +| macOS | Download from https://www.docker.com/products/docker-desktop and install | +| Windows | Install Docker Desktop; WSL2 will be enabled automatically | +| Linux | Install Docker Engine via your package manager (`apt`, `dnf`, etc.) | + +Verify Docker is working before proceeding: + +```bash +docker --version +docker run hello-world +``` + +--- + +## Installation + +```bash +# 1. Clone the repository +git clone +cd + +# 2. Build the Docker image (one-time, ~5 minutes) +./docker_build.sh +``` + +The build installs all Python dependencies from `project_files/requirements.txt` into the image. You only need to rebuild if `requirements.txt` or the `Dockerfile` changes. + +To force a clean rebuild (re-downloads all packages): + +```bash +./docker_build.sh --no-cache +``` + +--- + +## Usage + +### Full Pipeline + +Run the entire pipeline — data loading, training, evaluation, and prediction — in a single command: + +```bash +./run.sh +``` + +With options: + +```bash +# Swap the model backbone +./run.sh --model bert-base-uncased + +# Override training hyperparameters +./run.sh --model roberta-base --epochs 5 --batch_size 32 --lr 3e-5 + +# Set a custom prediction article +./run.sh --text "Federal Reserve raises interest rates for the third time this year" + +# Skip the Docker build step if the image already exists +./run.sh --skip-build --text "Apple reports record iPhone sales" +``` + +### Individual Steps (Used when you only want to run part of the process) + +Each pipeline step can also be run independently: + +```bash +# Inspect the dataset (label distribution, sample articles) +./docker_dataloader.sh + +# Fine-tune the model +./docker_train.sh +./docker_train.sh --model bert-base-uncased --epochs 3 + +# Evaluate the trained model +./docker_evaluate.sh +./docker_evaluate.sh --model_dir models/bert-ag-news + +# Run inference +./docker_predict.sh --text "NASA launches a new satellite into orbit" +./docker_predict.sh --file /app/project_files/articles.txt +./docker_predict.sh # interactive mode + +# Open a shell inside the container for debugging +./docker_bash.sh + +# Launch Jupyter Lab at http://localhost:8888/lab +./docker_jupyter.sh +JUPYTER_PORT=8889 ./docker_jupyter.sh # custom port + +# Remove the Docker image +./docker_clean.sh +./docker_clean.sh --volumes # also clears HF model cache +``` + +--- + +## Configuration + +All tunable parameters live in `project_files/config.py`. Edit this file to change any default without modifying the scripts. + +| Parameter | Default | Description | +|---|---|---| +| `DEFAULT_MODEL` | `distilbert-base-uncased` | Backbone used when no `--model` flag is passed | +| `BERT_MODEL` | `bert-base-uncased` | BERT checkpoint name for reference | +| `ROBERTA_MODEL` | `roberta-base` | RoBERTa checkpoint name for reference | +| `EPOCHS` | `3` | Number of training epochs | +| `BATCH_SIZE` | `16` | Per-device training batch size | +| `LEARNING_RATE` | `2e-5` | Peak learning rate | +| `WEIGHT_DECAY` | `0.01` | L2 regularization strength | +| `WARMUP_STEPS` | `500` | Linear LR warmup steps | +| `MAX_LENGTH` | `128` | Max tokens per article | +| `TRAIN_SUBSET` | `None` | Set to an integer to use a smaller training slice | +| `EVAL_SUBSET` | `None` | Set to an integer to use a smaller test slice | +| `OUTPUT_DIR` | `models/distilbert-ag-news` | Where the trained checkpoint is saved | +| `RESULTS_DIR` | `results` | Where evaluation outputs are saved | +| `SEED` | `42` | Random seed for reproducibility | + +--- + +## Pipeline Steps + +### Step 1 — Dataset Loading (`utils/dataset_loader.py`) + +Loads the [AG News](https://huggingface.co/datasets/ag_news) dataset from the HuggingFace Hub. AG News contains 120,000 training and 7,600 test articles across four categories: + +| ID | Category | +|---|---| +| 0 | World | +| 1 | Sports | +| 2 | Business | +| 3 | Sci/Tech | + +Key functions: + +- `load_ag_news()` — downloads the dataset from the HuggingFace Hub. +- `get_subsets()` — optionally samples a smaller slice for faster iteration, controlled by `TRAIN_SUBSET` and `EVAL_SUBSET` in `config.py`. +- `summarize_dataset()` — prints per-label counts to the terminal. +- `get_sample_articles()` — prints random raw examples for spot-checking before training. + +A validation split (90/10 from train) is created at load time since AG News does not provide one by default. This ensures the test set remains fully unseen during model selection. + +--- + +### Step 2 — Preprocessing (`utils/preprocessing.py`) + +Three-stage pipeline applied before tokenization: + +**Clean** (`clean_text()`): Removes HTML entities (`&`, `"`), URLs, and collapses excess whitespace. Punctuation and casing are preserved for the tokenizer. + +**Tokenize** (`make_tokenize_fn()`): Returns a closure for use with `dataset.map()`. Applies `padding="max_length"` and `truncation=True` at `MAX_LENGTH=128` tokens using `AutoTokenizer`. + +**Format** (`tokenize_dataset()`): Runs batched tokenization across the full `DatasetDict` and sets PyTorch tensor format on `input_ids`, `attention_mask`, and `label`. + +--- + +### Step 3 — Training (`scripts/train.py`) + +Fine-tunes a transformer model on the preprocessed dataset using HuggingFace `Trainer`. + +**Model**: `AutoModelForSequenceClassification` adds a dropout layer and a linear projection (`hidden_size → 4 labels`) on top of the pre-trained backbone. Only the classification head is randomly initialized; transformer weights come from the HuggingFace checkpoint. + +**Training decisions**: +- All transformer layers are trainable (full fine-tune, not frozen). +- Linear learning rate warmup over 500 steps avoids large early gradient updates. +- Weight decay regularization prevents overfitting. +- Evaluation runs after every epoch; the best checkpoint is selected by macro-F1. +- `fp16` mixed-precision is enabled automatically when CUDA is available. + +**CLI flags**: + +```bash +python scripts/train.py --model bert-base-uncased --epochs 5 --batch_size 32 --lr 3e-5 +``` + +**Outputs** (written to `models//`): +- `best/` — best checkpoint (model weights + tokenizer). +- `train_results.txt` — per-epoch training log. + +--- + +### Step 4 — Evaluation (`scripts/evaluate.py`) + +Runs batch inference on the full test set and saves four outputs to `results/`: + +| Output file | Contents | +|---|---| +| `classification_report.txt` | Per-class precision, recall, F1, and support | +| `confusion_matrix.png` | Heatmap of true vs predicted labels | +| `per_class_metrics.png` | Grouped bar chart of precision, recall, and F1 per category | +| `predictions.csv` | Row-level predictions with true label, predicted label, and a correct/incorrect flag | + +Uses `matplotlib.use("Agg")` so plots render inside Docker without a display. + +**CLI flags**: + +```bash +python scripts/evaluate.py --model_dir models/bert-ag-news +``` + +--- + +### Step 5 — Inference (`scripts/predict.py`) + +Loads a saved checkpoint and classifies articles in three modes: + +**Single article**: +```bash +./docker_predict.sh --text "Apple reports record iPhone sales in Q3" +``` + +**File** (one article per line): +```bash +./docker_predict.sh --file /app/project_files/articles.txt +``` + +**Interactive** (type articles one at a time, `Ctrl+C` to quit): +```bash +./docker_predict.sh +``` + +Example output: +``` +── Result 1 ───────────────────────────────────────────── + Text : Apple reports record iPhone sales in Q3... + Prediction : Business (94.21% confidence) + All scores : + Business 94.21% ██████████████████ + Sci/Tech 3.87% + World 1.12% + Sports 0.80% +``` + +--- + +## Outputs + +All outputs are written to the host machine via the Docker volume mount and persist after the container exits. + +| Path | Contents | Generated by | +|---|---|---| +| `models//best/` | Fine-tuned model weights and tokenizer | `train.py` | +| `models//train_results.txt` | Per-epoch training log | `train.py` | +| `results/classification_report.txt` | Full sklearn classification report | `evaluate.py` | +| `results/confusion_matrix.png` | Confusion matrix heatmap | `evaluate.py` | +| `results/per_class_metrics.png` | Per-class metric bar chart | `evaluate.py` | +| `results/predictions.csv` | Row-level test set predictions | `evaluate.py` | + +--- + +## Release Notes + +### Release v1.0 +Initial data and preprocessing pipeline. + +- `config.py` — central configuration for all constants and hyperparameters. +- `utils/dataset_loader.py` — AG News loading, subsetting, and dataset inspection. +- `utils/preprocessing.py` — text cleaning, tokenization, and dataset formatting. +- `utils/metrics.py` — Trainer callback (`compute_metrics`) and sklearn report utility (`full_report`). +- `requirements.txt` — full dependency list. + +### Release v2.0 +Model training, inference, and Docker integration. + +- **New**: `scripts/train.py` — end-to-end fine-tuning with HuggingFace Trainer. +- **New**: `scripts/predict.py` — three-mode inference (single article, file, interactive). +- **New**: `run.sh` — unified pipeline wrapper with CLI flags forwarded to each step. +- **New**: `Dockerfile`, `docker_utils.sh`, `docker_name.sh`, `docker_build.sh`, `docker_train.sh`, `docker_predict.sh`, `docker_dataloader.sh`, `docker_bash.sh`, `docker_jupyter.sh`, `docker_clean.sh`, `run_jupyter.sh`, `version.sh` — full standalone Docker integration. +- **Modified**: `config.py` — added `BERT_MODEL`, `ROBERTA_MODEL`, and `RESULTS_DIR`. +- **Modified**: `utils/dataset_loader.py` — added 90/10 validation split from the training set since AG News has no default validation split. +- **Modified**: `utils/preprocessing.py` — fixed `sys.path` insert for subdirectory execution. +- **Modified**: `requirements.txt` — moved from project root into `project_files/`; root-level duplicate removed. +- **Modified**: `Dockerfile` — upgraded from bare template to full ML image; added system packages required by PyTorch and scikit-learn; configured HuggingFace cache volume. + +### Release v3.0 +Model evaluation and result export. + +- **New**: `scripts/evaluate.py` — batch inference on the test set, exports classification report, confusion matrix, per-class metric chart, and predictions CSV. +- **New**: `docker_evaluate.sh` — runs `evaluate.py` inside the container; results written to `./results/` on the host. +- **Modified**: `run.sh` — evaluation added as step 4; prediction moved to step 5; step counters updated. +- **Modified**: `utils/metrics.py` — removed `import evaluate` (HuggingFace library) to resolve a circular import. When running `scripts/evaluate.py`, Python adds `scripts/` to `sys.path` automatically, causing `import evaluate` inside `metrics.py` to resolve to `scripts/evaluate.py` instead of the HuggingFace package. Fixed by replacing the single usage with `accuracy_score` from sklearn, which produces identical results with no behaviour change. \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/bashrc b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/bashrc new file mode 100644 index 000000000..4b7ff4c49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/bashrc @@ -0,0 +1 @@ +set -o vi diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/copy_docker_files.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/copy_docker_files.py new file mode 100644 index 000000000..0e97c194c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/copy_docker_files.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python + +""" +Copy Docker-related files from the source directory to a destination directory. + +This script copies all Docker configuration and utility files from +class_project/project_template/ to a specified destination directory. + +Usage examples: + # Copy all files to a target directory. + > ./copy_docker_files.py --dst_dir /path/to/destination + + # Copy with verbose logging. + > ./copy_docker_files.py --dst_dir /path/to/destination -v DEBUG + +Import as: + +import class_project.project_template.copy_docker_files as cpdccodo +""" + +import argparse +import logging +import os +from typing import List + +import helpers.hdbg as hdbg +import helpers.hio as hio +import helpers.hparser as hparser +import helpers.hsystem as hsystem + +_LOG = logging.getLogger(__name__) + +# ############################################################################# +# Constants +# ############################################################################# + +# List of files to copy from the source directory. +_FILES_TO_COPY = [ + "bashrc", + "docker_bash.sh", + "docker_build.sh", + "docker_clean.sh", + "docker_cmd.sh", + "docker_exec.sh", + "docker_jupyter.sh", + "docker_name.sh", + "docker_push.sh", + "etc_sudoers", + "install_jupyter_extensions.sh", + "run_jupyter.sh" + "version.sh", +] + + +# ############################################################################# +# Helper functions +# ############################################################################# + + +def _get_source_dir() -> str: + """ + Get the absolute path to the source directory containing Docker files. + + :return: absolute path to class_project/project_template/ + """ + # Get the directory where this script is located. + script_dir = os.path.dirname(os.path.abspath(__file__)) + _LOG.debug("Script directory='%s'", script_dir) + return script_dir + + +def _copy_files( + *, + src_dir: str, + dst_dir: str, + files: List[str], +) -> None: + """ + Copy specified files from source directory to destination directory. + + :param src_dir: source directory path + :param dst_dir: destination directory path + :param files: list of filenames to copy + """ + # Verify source directory exists. + hdbg.dassert_dir_exists(src_dir, "Source directory does not exist:", src_dir) + # Create destination directory if it doesn't exist. + hio.create_dir(dst_dir, incremental=True) + _LOG.info("Copying %d files from '%s' to '%s'", len(files), src_dir, dst_dir) + # Copy each file. + copied_count = 0 + for filename in files: + src_path = os.path.join(src_dir, filename) + dst_path = os.path.join(dst_dir, filename) + # Verify source file exists. + hdbg.dassert_path_exists( + src_path, "Source file does not exist:", src_path + ) + # Copy the file using cp -a to preserve all permissions and attributes. + _LOG.debug("Copying '%s' -> '%s'", src_path, dst_path) + cmd = f"cp -a {src_path} {dst_path}" + hsystem.system(cmd) + copied_count += 1 + # + _LOG.info("Successfully copied %d files", copied_count) + + +# ############################################################################# + + +def _parse() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--dst_dir", + action="store", + required=True, + help="Destination directory where files will be copied", + ) + hparser.add_verbosity_arg(parser) + return parser + + +def _main(parser: argparse.ArgumentParser) -> None: + args = parser.parse_args() + hdbg.init_logger(verbosity=args.log_level, use_exec_path=True) + # Get source directory. + src_dir = _get_source_dir() + # Copy files to destination. + _copy_files( + src_dir=src_dir, + dst_dir=args.dst_dir, + files=_FILES_TO_COPY, + ) + + +if __name__ == "__main__": + _main(_parse()) diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_bash.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_bash.sh new file mode 100755 index 000000000..54f34f78a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_bash.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# The current directory is mounted at /app so all code changes are live. +# ./docker_bash.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +CONTAINER_NAME="${IMAGE_NAME}_bash" +OPTS=$(base_run_opts "$CONTAINER_NAME" "-it") + +run "docker run $OPTS $FULL_IMAGE_NAME /bin/bash" diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.sh new file mode 100755 index 000000000..739c36177 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# ./docker_build.sh --no-cache # force full rebuild (re-installs all deps) + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +# Enable BuildKit for faster, cached layer builds. +export DOCKER_BUILDKIT=1 + +# Pass any extra args (like --no-cache) straight through to docker build. +EXTRA_ARGS="$*" + +run "docker build $EXTRA_ARGS -t $FULL_IMAGE_NAME $SCRIPT_DIR" + +echo "" +echo " Image built: $FULL_IMAGE_NAME" +echo " Run './docker_bash.sh' to open an interactive shell." +echo " Run './docker_train.sh' to start fine-tuning." +echo " Run './docker_jupyter.sh' to launch Jupyter Lab." diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.version.log b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.version.log new file mode 100644 index 000000000..8315eefe2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_build.version.log @@ -0,0 +1 @@ +the input device is not a TTY diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_clean.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_clean.sh new file mode 100755 index 000000000..9ffd7f889 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_clean.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Remove the project Docker image +# ./docker_clean.sh --volumes # also removes the HF cache named volume +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +run "docker image rm -f $FULL_IMAGE_NAME" || true + +if [[ "$1" == "--volumes" ]]; then + echo "Also removing named volume: $HF_CACHE_VOLUME" + run "docker volume rm $HF_CACHE_VOLUME" || true +fi + +echo "" +run "docker ps -a" +echo " Cleanup complete. Run './docker_build.sh' to rebuild." diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_cmd.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_cmd.sh new file mode 100755 index 000000000..906d7a77b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_cmd.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# """ +# Execute a command in a Docker container. +# +# This script runs a specified command inside a new Docker container instance. +# The container is removed automatically after the command completes. The +# git root is mounted to /git_root inside the container. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Parse default args (-h, -v) and enable set -x if -v is passed. +# Shift processed option flags so remaining args form the command. +parse_default_args "$@" +shift $((OPTIND-1)) + +# Capture the command to execute from remaining arguments. +CMD="$@" +echo "Executing: '$CMD'" + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# List available Docker images matching the expected image name. +run "docker image ls $FULL_IMAGE_NAME" +#(docker manifest inspect $FULL_IMAGE_NAME | grep arch) || true + +# Configure and run the Docker container with the specified command. +CONTAINER_NAME=$IMAGE_NAME +DOCKER_CMD=$(get_docker_cmd_command) +PORT="" +DOCKER_RUN_OPTS="" +DOCKER_CMD_OPTS=$(get_docker_bash_options $CONTAINER_NAME $PORT $DOCKER_RUN_OPTS) +run "$DOCKER_CMD $DOCKER_CMD_OPTS $FULL_IMAGE_NAME bash -c '$CMD'" diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_evaluate.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_evaluate.sh new file mode 100755 index 000000000..a1edde942 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_evaluate.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +CONTAINER_NAME="${IMAGE_NAME}_evaluate" +OPTS=$(base_run_opts "$CONTAINER_NAME") + +echo "Running evaluation..." +echo " Results will be saved to ./results/ on your host." +echo "" + +run "docker run $OPTS $FULL_IMAGE_NAME python project_files/scripts/evaluate_model.py $*" + +echo "" +echo " Evaluation complete. Check ./results/ for outputs." \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_exec.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_exec.sh new file mode 100755 index 000000000..24f8e401a --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_exec.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# """ +# Execute a bash shell in a running Docker container. +# +# This script connects to an already running Docker container and opens an +# interactive bash session for debugging or inspection purposes. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Parse default args (-h, -v) and enable set -x if -v is passed. +parse_default_args "$@" + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Execute bash shell in the running container. +exec_container diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_jupyter.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_jupyter.sh new file mode 100755 index 000000000..eb5a3bcbd --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_jupyter.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# JUPYTER_PORT=8889 ./docker_jupyter.sh +# http://localhost:8888/lab +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +CONTAINER_NAME="${IMAGE_NAME}_jupyter" + +# -p maps host:container port; -d runs detached so the terminal stays free. +OPTS=$(base_run_opts "$CONTAINER_NAME" "-d -p ${JUPYTER_PORT}:8888") + +echo " Starting Jupyter Lab on http://localhost:${JUPYTER_PORT}/lab" +echo " (container: $CONTAINER_NAME)" +echo "" + +run "docker run $OPTS $FULL_IMAGE_NAME /bin/bash run_jupyter.sh" + +echo "" +echo " Jupyter is running. Open: http://localhost:${JUPYTER_PORT}/lab" +echo " Stop with: docker stop $CONTAINER_NAME" diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_name.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_name.sh new file mode 100755 index 000000000..5c746a9c4 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_name.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +REPO_NAME=data605_class_project +IMAGE_NAME=huggingface_text_classifier +FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + +# Named volume for HuggingFace model downloads (shared across all containers). +HF_CACHE_VOLUME=hf_cache + +# Default Jupyter port (override with JUPYTER_PORT env var if needed). +JUPYTER_PORT=${JUPYTER_PORT:-8888} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_predict.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_predict.sh new file mode 100755 index 000000000..5023b0659 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_predict.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Run inference with the fine-tuned model inside Docker. +# ./docker_predict.sh --text "Apple reports record iPhone sales" +# ./docker_predict.sh --file /app/articles.txt # file must be inside ./ +# ./docker_predict.sh # interactive mode +# +# Note: run ./docker_train.sh first so the model checkpoint exists. + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +CONTAINER_NAME="${IMAGE_NAME}_predict" + +# Only allocate a TTY for interactive mode (no args). +# When --text or --file is passed it runs non-interactively +if [[ $# -eq 0 ]]; then + EXTRA="-it" +else + EXTRA="" +fi + +OPTS=$(base_run_opts "$CONTAINER_NAME" "$EXTRA") + +run "docker run $OPTS $FULL_IMAGE_NAME python project_files/scripts/predict.py $*" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_push.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_push.sh new file mode 100755 index 000000000..27d752dd9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_push.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# """ +# Push Docker container image to Docker Hub or registry. +# +# This script authenticates with the Docker registry using credentials from +# ~/.docker/passwd.$REPO_NAME.txt and pushes the locally built container +# image to the remote repository. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Parse default args (-h, -v) and enable set -x if -v is passed. +parse_default_args "$@" + +# Load Docker image naming configuration. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source $SCRIPT_DIR/docker_name.sh + +# Push the container image to the registry. +push_container_image diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_train.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_train.sh new file mode 100755 index 000000000..8eaf87422 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_train.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Fine-tune the transformer model inside Docker. +# ./docker_train.sh # DistilBERT default +# ./docker_train.sh --model bert-base-uncased # swap backbone +# ./docker_train.sh --epochs 5 --batch_size 32 # override hyperparams +# ./docker_train.sh --model roberta-base --lr 3e-5 + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/docker_utils.sh" +load_docker_vars + +CONTAINER_NAME="${IMAGE_NAME}_train" +# No -it so it can be run non-interactively (e.g. in CI or nohup). +OPTS=$(base_run_opts "$CONTAINER_NAME") + +echo " Starting training — args: $*" +echo " Fine-tuned model will be saved to ./models/ on your host." +echo "" + +run "docker run $OPTS $FULL_IMAGE_NAME python project_files/scripts/train.py $*" + +echo "" +echo " Training complete. Check ./models/ for the saved checkpoint." diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_utils.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_utils.sh new file mode 100755 index 000000000..75ccc5a08 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/docker_utils.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# docker_utils.sh — shared helpers sourced by all docker_*.sh scripts. + +# - run() : echo + execute a command +# - load_docker_vars() : source docker_name.sh and print the resolved names +# - base_run_opts() : common `docker run` flags used by every script + +run() { + echo "+ $*" + eval "$*" +} + +# Source docker_name.sh (always relative to the script calling this file). +load_docker_vars() { + local script_dir + script_dir="$(cd "$(dirname "${BASH_SOURCE[1]}")" && pwd)" + # shellcheck source=docker_name.sh + source "$script_dir/docker_name.sh" + echo "──────────────────────────────────────────" + echo " REPO : $REPO_NAME" + echo " IMAGE : $IMAGE_NAME" + echo " FULL IMAGE : $FULL_IMAGE_NAME" + echo " HF VOLUME : $HF_CACHE_VOLUME" + echo "──────────────────────────────────────────" +} + +base_run_opts() { + local container_name="$1" + local extra="${2:-}" + echo "--rm \ + --name $container_name \ + -v \"$(pwd):/app\" \ + -v \"$HF_CACHE_VOLUME:/hf_cache\" \ + $extra" +} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/etc_sudoers b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/etc_sudoers new file mode 100644 index 000000000..ee0816a15 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/etc_sudoers @@ -0,0 +1,31 @@ +# +# This file MUST be edited with the 'visudo' command as root. +# +# Please consider adding local content in /etc/sudoers.d/ instead of +# directly modifying this file. +# +# See the man page for details on how to write a sudoers file. +# +Defaults env_reset +Defaults mail_badpass +Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin" + +# Host alias specification + +# User alias specification + +# Cmnd alias specification + +# User privilege specification +root ALL=(ALL:ALL) ALL + +# Members of the admin group may gain root privileges +%admin ALL=(ALL) ALL + +# Allow members of group sudo to execute any command +%sudo ALL=(ALL:ALL) ALL + +# See sudoers(5) for more information on "#include" directives: +postgres ALL=(ALL) NOPASSWD:ALL + +#includedir /etc/sudoers.d diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/config.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/config.py new file mode 100644 index 000000000..e6043a234 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/config.py @@ -0,0 +1,28 @@ +#Dataset +DATASET_NAME = "ag_news" +NUM_LABELS = 4 +LABEL_NAMES = ["World", "Sports", "Business", "Sci/Tech"] +LABEL2ID = {name: i for i, name in enumerate(LABEL_NAMES)} +ID2LABEL = {i: name for i, name in enumerate(LABEL_NAMES)} + +#Model +DEFAULT_MODEL = "distilbert-base-uncased" +BERT_MODEL = "bert-base-uncased" +ROBERTA_MODEL = "roberta-base" + +#Training +OUTPUT_DIR = "models/distilbert-ag-news" +EPOCHS = 3 +BATCH_SIZE = 16 +LEARNING_RATE = 2e-5 +WEIGHT_DECAY = 0.01 +WARMUP_STEPS = 500 +MAX_LENGTH = 128 #Max token length per article +TRAIN_SUBSET = None +EVAL_SUBSET = None + +#Evaluation +RESULTS_DIR = "results" + +#Reproducibility +SEED = 42 diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/requirements.txt new file mode 100644 index 000000000..8beb6c981 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/requirements.txt @@ -0,0 +1,13 @@ +transformers>=4.35.0 +datasets>=2.14.0 +torch>=2.0.0 +accelerate>=0.24.0 +evaluate>=0.4.0 +scikit-learn>=1.3.0 +pandas>=2.0.0 +numpy>=1.24.0 +matplotlib>=3.7.0 +seaborn>=0.12.0 +tqdm>=4.65.0 +optuna>=3.3.0 +jupyterlab>=4.0.0 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/evaluate_model.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/evaluate_model.py new file mode 100644 index 000000000..6bb0fdc9b --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/evaluate_model.py @@ -0,0 +1,208 @@ +# scripts/evaluate.py +""" +1. Loads the fine-tuned model from models/distilbert-ag-news/best/ +2. Runs batch inference over the full test set +3. Computes accuracy, macro F1, per-class precision/recall/F1 +4. Saves the below to results/: + - classification_report.txt + - confusion_matrix.png + - per_class_metrics.png + - predictions.csv +""" + +import argparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import numpy as np +import pandas as pd +import torch +from torch.utils.data import DataLoader +from transformers import ( + AutoTokenizer, + AutoModelForSequenceClassification, + DataCollatorWithPadding, +) +from tqdm import tqdm +import matplotlib +matplotlib.use("Agg") # non-interactive backend — works inside Docker +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.metrics import accuracy_score, f1_score + +from config import OUTPUT_DIR, RESULTS_DIR, LABEL_NAMES, ID2LABEL, BATCH_SIZE +from utils.dataset_loader import load_ag_news, get_subsets +from utils.preprocessing import tokenize_dataset +from utils.metrics import full_report + +os.makedirs(RESULTS_DIR, exist_ok=True) + + +# Model Loading + +def load_model(model_dir: str): + """Load fine-tuned model and tokenizer from disk.""" + best_path = os.path.join(model_dir, "best") + load_path = best_path if os.path.isdir(best_path) else model_dir + if not os.path.isdir(load_path): + print(f"[evaluate] Model not found at '{load_path}'.") + print("[evaluate] Run `python scripts/train.py` first.") + sys.exit(1) + print(f"[evaluate] Loading model from: {load_path}") + tokenizer = AutoTokenizer.from_pretrained(load_path) + model = AutoModelForSequenceClassification.from_pretrained(load_path) + return tokenizer, model + + +# ─── Batch Inference ─────────────────────────────────────────────────────────── + +def run_inference(model, test_ds, tokenizer, device): + """ + Run inference on the entire test split in batches. + + Returns + ------- + y_true : np.ndarray, shape (N,) + y_pred : np.ndarray, shape (N,) + """ + collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt") + loader = DataLoader(test_ds, batch_size=BATCH_SIZE * 2, collate_fn=collator) + + model.eval() + model.to(device) + + all_preds, all_labels = [], [] + + with torch.no_grad(): + for batch in tqdm(loader, desc="Evaluating"): + labels = batch.pop("labels") + batch = {k: v.to(device) for k, v in batch.items()} + logits = model(**batch).logits + preds = torch.argmax(logits, dim=-1).cpu().numpy() + all_preds.extend(preds) + all_labels.extend(labels.numpy()) + + return np.array(all_labels), np.array(all_preds) + + +# ─── Plots ───────────────────────────────────────────────────────────────────── + +def plot_confusion_matrix(cm, label_names, save_path): + """Save a styled confusion matrix heatmap.""" + fig, ax = plt.subplots(figsize=(7, 6)) + sns.heatmap( + cm, annot=True, fmt="d", cmap="Blues", + xticklabels=label_names, yticklabels=label_names, + linewidths=0.5, ax=ax, + ) + ax.set_xlabel("Predicted Label", fontsize=12, labelpad=10) + ax.set_ylabel("True Label", fontsize=12, labelpad=10) + ax.set_title("Confusion Matrix — AG News Test Set", fontsize=14, pad=15) + plt.tight_layout() + fig.savefig(save_path, dpi=150) + plt.close(fig) + print(f"[evaluate] Confusion matrix saved : {save_path}") + + +def plot_per_class_metrics(report_str, label_names, save_path): + """Parse sklearn report string and plot per-class bars.""" + lines = report_str.strip().split("\n") + rows = [] + for line in lines[2: 2 + len(label_names)]: + parts = line.split() + if len(parts) >= 5: + rows.append({ + "class": parts[0], + "Precision": float(parts[1]), + "Recall": float(parts[2]), + "F1-Score": float(parts[3]), + }) + + if not rows: + print("[evaluate] Could not parse report for per-class chart.") + return + + df = pd.DataFrame(rows).set_index("class") + ax = df.plot(kind="bar", figsize=(9, 5), colormap="Set2", + width=0.7, edgecolor="white") + ax.set_title("Per-Class Metrics — AG News", fontsize=14) + ax.set_xlabel("News Category", fontsize=11) + ax.set_ylabel("Score", fontsize=11) + ax.set_ylim(0, 1.05) + ax.legend(loc="lower right") + ax.grid(axis="y", alpha=0.4) + plt.xticks(rotation=0) + plt.tight_layout() + ax.figure.savefig(save_path, dpi=150) + plt.close(ax.figure) + print(f"[evaluate] Per-class chart saved : {save_path}") + + +# ─── Main ────────────────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_dir", type=str, default=OUTPUT_DIR) + args = parser.parse_args() + + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"[evaluate] Device: {device}") + + # Load model. + tokenizer, model = load_model(args.model_dir) + + # Load and tokenize test data. + dataset = load_ag_news() + dataset = get_subsets(dataset) + tokenized = tokenize_dataset(dataset, tokenizer) + test_ds = tokenized["test"] + + # Run inference. + print(f"\n[evaluate] Running inference on {len(test_ds):,} test examples...") + y_true, y_pred = run_inference(model, test_ds, tokenizer, device) + + # ── 1. Classification report ─────────────────────────────────────────────── + report, cm = full_report(y_true, y_pred, LABEL_NAMES) + print("\n── Classification Report ─────────────────────────────────────────") + print(report) + + report_path = os.path.join(RESULTS_DIR, "classification_report.txt") + with open(report_path, "w") as f: + f.write(f"Model : {args.model_dir}\n\n") + f.write(report) + print(f"[evaluate] Report saved : {report_path}") + + # ── 2. Confusion matrix ──────────────────────────────────────────────────── + cm_path = os.path.join(RESULTS_DIR, "confusion_matrix.png") + plot_confusion_matrix(cm, LABEL_NAMES, cm_path) + + # ── 3. Per-class metrics bar chart ───────────────────────────────────────── + pc_path = os.path.join(RESULTS_DIR, "per_class_metrics.png") + plot_per_class_metrics(report, LABEL_NAMES, pc_path) + + # ── 4. Raw predictions CSV ───────────────────────────────────────────────── + pred_df = pd.DataFrame({ + "true_label_id": y_true, + "pred_label_id": y_pred, + "true_label": [ID2LABEL[i] for i in y_true], + "pred_label": [ID2LABEL[i] for i in y_pred], + "correct": y_true == y_pred, + }) + csv_path = os.path.join(RESULTS_DIR, "predictions.csv") + pred_df.to_csv(csv_path, index=False) + print(f"[evaluate] Predictions saved : {csv_path}") + + # ── 5. Summary ──────────────────────────────────────────────────────────── + acc = accuracy_score(y_true, y_pred) + f1 = f1_score(y_true, y_pred, average="macro") + print(f"\n{'='*50}") + print(f" Final Results") + print(f" Accuracy : {acc:.4f} ({acc*100:.2f}%)") + print(f" F1 Macro : {f1:.4f}") + print(f"{'='*50}\n") + + +if __name__ == "__main__": + main() diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/predict.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/predict.py new file mode 100644 index 000000000..6a7e23d36 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/predict.py @@ -0,0 +1,108 @@ +# scripts/predict.py +#This script is to run the fine-tuned model on custom text input. + +import argparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import torch +from transformers import AutoTokenizer, AutoModelForSequenceClassification + +from config import OUTPUT_DIR, MAX_LENGTH, ID2LABEL +from utils.preprocessing import clean_text + + +def load_model(model_dir: str): + #Load a fine-tuned model and tokenizer(best model that we got from train.py) + best_path = os.path.join(model_dir, "best") + load_path = best_path if os.path.isdir(best_path) else model_dir + + if not os.path.isdir(load_path): + print(f"[predict] Model not found at '{load_path}'.") + print("[predict] Please run `python scripts/train.py` first.") + sys.exit(1) + + print(f"[predict] Loading model from: {load_path}") + tokenizer = AutoTokenizer.from_pretrained(load_path) + model = AutoModelForSequenceClassification.from_pretrained(load_path) + model.eval() + return tokenizer, model + + +def predict(texts, tokenizer, model, device="cpu"): + #Predict category labels for a list of texts. + cleaned = [clean_text(t) for t in texts] + inputs = tokenizer( + cleaned, + padding=True, + truncation=True, + max_length=MAX_LENGTH, + return_tensors="pt", + ).to(device) + + with torch.no_grad(): + outputs = model(**inputs) + + probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy() + + results = [] + for text, prob_row in zip(texts, probs): + pred_id = prob_row.argmax() + results.append({ + "text": text[:120] + "..." if len(text) > 120 else text, + "label": ID2LABEL[pred_id], + "confidence": round(float(prob_row[pred_id]) * 100, 2), + "all_scores": {ID2LABEL[i]: round(float(p) * 100, 2) for i, p in enumerate(prob_row)}, + }) + return results + + +def display_results(results): + for i, r in enumerate(results, 1): + print(f"\n── Result {i} {'─'*45}") + print(f" Text : {r['text']}") + print(f" Prediction : {r['label']} ({r['confidence']}% confidence)") + print(f" All scores :") + for label, score in sorted(r["all_scores"].items(), key=lambda x: -x[1]): + bar = "▓" * int(score // 5) + print(f" {label:<12} {score:>6.2f}% {bar}") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--text", type=str, default=None, help="Article text to classify.") + parser.add_argument("--file", type=str, default=None, help="Path to a text file (one article per line).") + parser.add_argument("--model_dir", type=str, default=OUTPUT_DIR, help="Directory of the fine-tuned model.") + args = parser.parse_args() + + device = "cuda" if torch.cuda.is_available() else "cpu" + tokenizer, model = load_model(args.model_dir) + model = model.to(device) + + if args.text: + texts = [args.text] + elif args.file: + with open(args.file) as f: + texts = [line.strip() for line in f if line.strip()] + else: + print("[predict] Interactive mode — type an article and press Enter (Ctrl+C to quit).") + texts = [] + while True: + try: + t = input("\n Article: ").strip() + if t: + results = predict([t], tokenizer, model, device) + display_results(results) + except KeyboardInterrupt: + print("\nBye!") + break + return + + results = predict(texts, tokenizer, model, device) + display_results(results) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/train.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/train.py new file mode 100644 index 000000000..4e39392b1 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/scripts/train.py @@ -0,0 +1,160 @@ +""" +1. Loads AG News dataset and applies the preprocessing pipeline. +2. Instantiates a pre-trained DistilBERT model with a classification head + (AutoModelForSequenceClassification). +3. Configures HuggingFace Trainer with TrainingArguments. +4. Saves the best checkpoint(model) +""" + +import argparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from transformers import ( + AutoModelForSequenceClassification, + TrainingArguments, + Trainer, + DataCollatorWithPadding, +) +import torch + +from config import ( + DEFAULT_MODEL, OUTPUT_DIR, EPOCHS, BATCH_SIZE, + LEARNING_RATE, WEIGHT_DECAY, WARMUP_STEPS, + NUM_LABELS, LABEL_NAMES, ID2LABEL, LABEL2ID, SEED, +) +from utils.dataset_loader import load_ag_news, get_subsets +from utils.preprocessing import get_tokenizer, tokenize_dataset +from utils.metrics import compute_metrics + + +def parse_args(): + parser = argparse.ArgumentParser(description="Fine-tune a transformer for news classification.") + parser.add_argument( + "--model", type=str, default=DEFAULT_MODEL, + help="HuggingFace model checkpoint to fine-tune." + ) + parser.add_argument( + "--output_dir", type=str, default=None, + help="Where to save the fine-tuned model. Defaults to config OUTPUT_DIR." + ) + parser.add_argument( + "--epochs", type=int, default=EPOCHS, + help="Number of training epochs." + ) + parser.add_argument( + "--batch_size", type=int, default=BATCH_SIZE, + help="Per-device batch size." + ) + parser.add_argument( + "--lr", type=float, default=LEARNING_RATE, + help="Peak learning rate." + ) + return parser.parse_args() + + +def build_model(model_name: str): + #Load a pre-trained transformer model with a sequence classification head. + print(f"\n[train] Loading model: {model_name}") + model = AutoModelForSequenceClassification.from_pretrained( + model_name, + num_labels=NUM_LABELS, + id2label=ID2LABEL, + label2id=LABEL2ID, + ) + total_params = sum(p.numel() for p in model.parameters()) + trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) + print(f"[train] Total params : {total_params:,}") + print(f"[train] Trainable params: {trainable:,}") + return model + + +def build_training_args(output_dir: str, epochs: int, batch_size: int, lr: float): + """ + Configure HuggingFace training arguments. + - load_best_model_at_end restores best checkpoint after training + - metric_for_best_model: f1_macro chosen over accuracy to account for class imbalance + """ + use_fp16 = torch.cuda.is_available() + return TrainingArguments( + output_dir=output_dir, + num_train_epochs=epochs, + per_device_train_batch_size=batch_size, + per_device_eval_batch_size=batch_size * 2, + learning_rate=lr, + weight_decay=WEIGHT_DECAY, + warmup_steps=WARMUP_STEPS, + eval_strategy="epoch", + save_strategy="epoch", + load_best_model_at_end=True, + metric_for_best_model="f1_macro", + greater_is_better=True, + logging_dir=os.path.join(output_dir, "logs"), + logging_steps=100, + seed=SEED, + fp16=use_fp16, + report_to="none", + ) + + +def main(): + args = parse_args() + model_name = args.model + out_dir = args.output_dir or OUTPUT_DIR + os.makedirs(out_dir, exist_ok=True) + + #Load & preprocess data + dataset = load_ag_news() + dataset = get_subsets(dataset) + tokenizer = get_tokenizer(model_name) + tokenized = tokenize_dataset(dataset, tokenizer) + + train_ds = tokenized["train"] + eval_ds = tokenized["validation"] + + #Build model + model = build_model(model_name) + + #Configure training + training_args = build_training_args(out_dir, args.epochs, args.batch_size, args.lr) + + # DataCollatorWithPadding pads each batch to its longest sequence + # (more efficient than global max_length padding) + data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + + #Train + trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_ds, + eval_dataset=eval_ds, + processing_class=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics, +) + + print(f"\n[train] Starting fine-tuning — {args.epochs} epoch(s)...") + trainer.train() + + #Step 5: Save best model + best_path = os.path.join(out_dir, "best") + trainer.save_model(best_path) + tokenizer.save_pretrained(best_path) + print(f"\n[train] Best model saved to: {best_path}") + + # Save training metrics summary + metrics_path = os.path.join(out_dir, "train_results.txt") + with open(metrics_path, "w") as f: + f.write(f"Model: {model_name}\n") + f.write(f"Epochs: {args.epochs}\n") + f.write(f"Batch size: {args.batch_size}\n") + f.write(f"Learning rate: {args.lr}\n\n") + for log in trainer.state.log_history: + f.write(str(log) + "\n") + print(f"[train] Training log saved to: {metrics_path}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/dataset_loader.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/dataset_loader.py new file mode 100644 index 000000000..62920367f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/dataset_loader.py @@ -0,0 +1,71 @@ +#Data Loading + +import random +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from datasets import load_dataset +import pandas as pd +from config import ( + DATASET_NAME, LABEL_NAMES, ID2LABEL, + TRAIN_SUBSET, EVAL_SUBSET, SEED +) + +def load_ag_news(): + dataset = load_dataset(DATASET_NAME) + # Split train into train + validation (90/10) + split = dataset["train"].train_test_split(test_size=0.1, seed=SEED) + dataset["train"] = split["train"] + dataset["validation"] = split["test"] + # Keep the original test set completely untouched + return dataset + +def get_subsets(dataset): + if TRAIN_SUBSET: + dataset["train"] = dataset["train"].shuffle(seed=SEED).select(range(TRAIN_SUBSET)) + val_size = max(1, TRAIN_SUBSET // 10) + dataset["validation"] = dataset["validation"].shuffle(seed=SEED).select(range(val_size)) + print(f"Using train subset: {TRAIN_SUBSET}") + print(f"Using validation subset: {val_size}") + if EVAL_SUBSET: + dataset["test"] = dataset["test"].shuffle(seed=SEED).select(range(EVAL_SUBSET)) + print(f"Using test subset: {EVAL_SUBSET}") + return dataset + + +def summarize_dataset(dataset): + + # Print summary + print("\nDataset Summary:") + for split_name, split in dataset.items(): + df = pd.DataFrame(split) + print(f"\n Split: {split_name} ({len(df):,} examples)") + counts = df["label"].value_counts().sort_index() + for label_id, count in counts.items(): + label_name = ID2LABEL[label_id] + bar = "█" * (count // 1000) + print(f" [{label_id}] {label_name:<12} {count:>6,} {bar}") + print("─" * 52 + "\n") + + +def get_sample_articles(dataset, n=3, split="train"): + + # Return n random sample articles with their labels + indices = random.sample(range(len(dataset[split])), n) + samples = dataset[split].select(indices) + print(f"\n{n} Sample Articles from '{split}' split") + for i, row in enumerate(samples): + label_name = ID2LABEL[row["label"]] + print(f"\n [{i+1}] Label: {label_name}") + print(f" Text : {row['text'][:200]}...") + print("─" * 52 + "\n") + return samples + + +if __name__ == "__main__": + dataset = load_ag_news() + dataset = get_subsets(dataset) + summarize_dataset(dataset) + get_sample_articles(dataset) diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/metrics.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/metrics.py new file mode 100644 index 000000000..74513f37f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/metrics.py @@ -0,0 +1,68 @@ +# utils/metrics.py +""" + - compute_metrics() → used as callback during HuggingFace Trainer training + - full_report() → detailed sklearn classification report + +These are separated from the training script to keep concerns clean and allow +the same metric logic to be reused across multiple models in Commit 4. +""" + +import numpy as np +from sklearn.metrics import ( + accuracy_score, + f1_score, + classification_report, + confusion_matrix, +) +import evaluate # HuggingFace evaluate library + + +# Load HuggingFace accuracy metric (used inside Trainer) +_hf_accuracy = evaluate.load("accuracy") + + +def compute_metrics(eval_pred): + """ + Callback passed to HuggingFace Trainer. + Called at the end of every evaluation step. + + Parameters + ---------- + eval_pred : EvalPrediction + .predictions → raw logits, shape (N, num_labels) + .label_ids → true labels, shape (N,) + + Returns + ------- + dict with keys: 'accuracy', 'f1_macro' + """ + logits, labels = eval_pred + predictions = np.argmax(logits, axis=-1) + + acc = _hf_accuracy.compute(predictions=predictions, references=labels)["accuracy"] + f1 = f1_score(labels, predictions, average="macro") + + return { + "accuracy": round(acc, 4), + "f1_macro": round(f1, 4), + } + + +def full_report(y_true, y_pred, label_names): + """ + Generate a detailed classification report and confusion matrix. + + Parameters + ---------- + y_true : list or np.ndarray of true label IDs + y_pred : list or np.ndarray of predicted label IDs + label_names : list of str (e.g. ['World', 'Sports', 'Business', 'Sci/Tech']) + + Returns + ------- + report : str (sklearn classification_report string) + cm : np.ndarray (confusion matrix) + """ + report = classification_report(y_true, y_pred, target_names=label_names, digits=4) + cm = confusion_matrix(y_true, y_pred) + return report, cm diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/preprocessing.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/preprocessing.py new file mode 100644 index 000000000..2f03a268f --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/project_files/utils/preprocessing.py @@ -0,0 +1,87 @@ +# utils/preprocessing.py +import re +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from transformers import AutoTokenizer +from config import DEFAULT_MODEL, MAX_LENGTH + + +# Text Cleaning + +def clean_text(text: str) -> str: + # Collapse whitespace + text = re.sub(r"\s+", " ", text) + # Remove HTML entities + text = re.sub(r"&[a-zA-Z]+;|&#\d+;", " ", text) + # Remove URLs + text = re.sub(r"https?://\S+|www\.\S+", "", text) + return text.strip() + + +# Tokenizer Setup + +def get_tokenizer(model_name: str = DEFAULT_MODEL): + """ + Load a HuggingFace AutoTokenizer + Parameters: model_name [HuggingFace model hub ID (like 'distilbert-base-uncased')] + Returns: PreTrainedTokenizer + """ + print(f"[preprocessing] Loading tokenizer: {model_name}") + tokenizer = AutoTokenizer.from_pretrained(model_name) + return tokenizer + + +# Tokenization Pipeline + +def make_tokenize_fn(tokenizer, max_length: int = MAX_LENGTH): + # Creates dataset.map() -> ready tokenizer that cleans text, pads/truncates to max_length, + # Returns input_ids, attention_mask (token_type_ids for BERT). + def tokenize_fn(examples): + # Clean all texts in the batch + cleaned = [clean_text(t) for t in examples["text"]] + # Tokenize + encoded = tokenizer( + cleaned, + padding="max_length", + truncation=True, + max_length=max_length, + ) + return encoded + + return tokenize_fn + + +def tokenize_dataset(dataset, tokenizer, max_length: int = MAX_LENGTH): + + #Apply tokenization to an entire HuggingFace DatasetDict. + #Returns a tokenized_dataset : DatasetDict + print(f"[preprocessing] Tokenizing dataset (max_length={max_length})...") + tokenize_fn = make_tokenize_fn(tokenizer, max_length) + + tokenized = dataset.map( + tokenize_fn, + batched=True, + desc="Tokenizing", + remove_columns=["text"], # keep 'label', add token columns + ) + + # Set output format for PyTorch + tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]) + print("[preprocessing] Tokenization complete.") + return tokenized + + +# Quick test + +if __name__ == "__main__": + sample = " NASA launches new satellite & rover into orbit. https://nasa.gov " + print("Raw :", repr(sample)) + print("Clean :", repr(clean_text(sample))) + + tok = get_tokenizer() + fn = make_tokenize_fn(tok) + result = fn({"text": [sample]}) + print("Tokens:", result["input_ids"]) diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/requirements.txt new file mode 100644 index 000000000..49aca3901 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +numpy +pandas +seaborn diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run.sh new file mode 100755 index 000000000..5c9433403 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# ./run.sh # full pipeline, all defaults +# ./run.sh --model roberta-base # swap backbone +# ./run.sh --model bert-base-uncased --epochs 5 +# ./run.sh --text "Apple reports record sales" # custom prediction text +# ./run.sh --skip-build # skip image build step + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +SKIP_BUILD=0 +TRAIN_ARGS="" +PREDICT_TEXT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --skip-build) + SKIP_BUILD=1 + shift + ;; + --text) + PREDICT_TEXT="$2" + shift 2 + ;; + --model|--epochs|--batch_size|--lr) + TRAIN_ARGS="$TRAIN_ARGS $1 $2" + shift 2 + ;; + *) + echo "Unknown argument: $1" + echo "Usage: ./run.sh [--skip-build] [--model ] [--epochs ] [--batch_size ] [--lr ] [--text
]" + exit 1 + ;; + esac +done + +step() { + echo "" + echo "════════════════════════════════════════════════" + echo " $1" + echo "════════════════════════════════════════════════" +} + +# Step 1: Build +if [[ $SKIP_BUILD -eq 0 ]]; then + step "Step 1/4 — Building Docker image" + bash "$SCRIPT_DIR/docker_build.sh" +else + step "Step 1/4 — Skipping build (--skip-build)" +fi + +# Step 2: Train +step "Step 2/4 — Training model" +bash "$SCRIPT_DIR/docker_train.sh" $TRAIN_ARGS + +# Step 3: Predict +step "Step 3/4 — Running inference" +if [[ -n "$PREDICT_TEXT" ]]; then + bash "$SCRIPT_DIR/docker_predict.sh" --text "$PREDICT_TEXT" +else + bash "$SCRIPT_DIR/docker_predict.sh" --text "Apple reports record iPhone sales in Q3" +fi + +# Step 4: Evaluate +step "Step 4/4 — Evaluating model" +bash "$SCRIPT_DIR/docker_evaluate.sh" + +echo "" +echo "Pipeline complete!!!!" \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run_jupyter.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run_jupyter.sh new file mode 100755 index 000000000..a6130fec2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/run_jupyter.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +jupyter lab \ + --no-browser \ + --ip=0.0.0.0 \ + --port=8888 \ + --allow-root \ + --NotebookApp.token='' \ + --NotebookApp.password='' \ + --notebook-dir=/app diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.ipynb b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.ipynb new file mode 100644 index 000000000..3afca937c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "183c2248-ea3d-43ba-b87e-d821bba1bbc6", + "metadata": {}, + "source": [ + "# Template API Notebook\n", + "\n", + "This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a neo4j tutorial the heading should be `Neo4j API`.\n", + "\n", + "- Add description of what the notebook does.\n", + "- Point to references, e.g. (neo4j.API.md)\n", + "- Add citations.\n", + "- Keep the notebook flow clear.\n", + "- Comments should be imperative and have a period at the end.\n", + "- Your code should be well commented.\n", + "\n", + "The name of this notebook should in the following format:\n", + "- if the notebook is exploring `pycaret API`, then it is `pycaret.API.ipynb`\n", + "\n", + "Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "265e0d58-a7cd-4edf-a0b4-96b60220e801", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "d3b2f997-5c9b-4238-b6d5-e5f2cea43809", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d1480ee9-d6a6-437d-b927-da6cbb05bdf5", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "# Import libraries in this section.\n", + "# Avoid imports like import *, from ... import ..., from ... import *, etc.\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hnotebook as hnotebo" + ] + }, + { + "cell_type": "markdown", + "id": "f9208cc9-837d-4fec-a312-9c4aa5b7648d", + "metadata": {}, + "source": [ + "## Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9a2d7a9c-c6c5-48c9-8445-11c97045d00b", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-085a2ce7-6161-4c8a-92d5-492051832f3c.json'\n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "hnotebo.config_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "79c37ba3-bd5d-4a44-87df-645eee54977a", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "## Make the notebook flow clear\n", + "Each notebook needs to follow a clear and logical flow, e.g:\n", + "- Load data\n", + "- Compute stats\n", + "- Clean data\n", + "- Compute stats\n", + "- Do analysis\n", + "- Show results\n", + "\n", + "\n", + "\n", + "\n", + "#############################################################################\n", + "Template\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a8a109cd-fc8e-4b9e-9dc0-4fc8d4126ad8", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Template:\n", + " \"\"\"\n", + " Brief imperative description of what the class does in one line, if needed.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def method1(self, arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the method does in one line.\n", + "\n", + " You can elaborate more in the method docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass\n", + "\n", + "\n", + "def template_function(arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the function does in one line.\n", + "\n", + " You can elaborate more in the function docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "00926523-ae59-497d-bba8-b22e58333849", + "metadata": {}, + "source": [ + "## The flow should be highlighted using headings in markdown\n", + "```\n", + "# Level 1\n", + "## Level 2\n", + "### Level 3\n", + "```" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.py new file mode 100644 index 000000000..4192ef8fe --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.API.py @@ -0,0 +1,129 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template API Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a neo4j tutorial the heading should be `Neo4j API`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.API.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.API.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% [markdown] +# ## Imports + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% [markdown] +# ## Configuration + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.ipynb new file mode 100644 index 000000000..a2e9aedd7 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50f78f7e-2dee-45d6-9d37-7a55eeaae283", + "metadata": {}, + "source": [ + "# Template Example Notebook\n", + "\n", + "This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`.\n", + "\n", + "- Add description of what the notebook does.\n", + "- Point to references, e.g. (neo4j.example.md)\n", + "- Add citations.\n", + "- Keep the notebook flow clear.\n", + "- Comments should be imperative and have a period at the end.\n", + "- Your code should be well commented.\n", + "\n", + "The name of this notebook should in the following format:\n", + "- if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb`\n", + "\n", + "Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6226667e-cab5-479c-be6a-6b7d6f580a97", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8020901a-4bc7-4b73-95e8-aaa462b4fc19", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "# Import libraries in this section.\n", + "# Avoid imports like import *, from ... import ..., from ... import *, etc.\n", + "\n", + "import helpers.hdbg as hdbg\n", + "import helpers.hnotebook as hnotebo" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ecb72b2-b21d-4fb0-ac92-e7174da390e6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0mWARNING: Running in Jupyter\n", + "INFO > cmd='/venv/lib/python3.12/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-783e0930-1631-4d64-8bb4-f3a98bb74fcd.json'\n" + ] + } + ], + "source": [ + "hdbg.init_logger(verbosity=logging.INFO)\n", + "\n", + "_LOG = logging.getLogger(__name__)\n", + "\n", + "hnotebo.config_notebook()" + ] + }, + { + "cell_type": "markdown", + "id": "1ede6422-bff2-4f0a-8d28-29a01d4786b2", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "## Make the notebook flow clear\n", + "Each notebook needs to follow a clear and logical flow, e.g:\n", + "- Load data\n", + "- Compute stats\n", + "- Clean data\n", + "- Compute stats\n", + "- Do analysis\n", + "- Show results\n", + "\n", + "\n", + "\n", + "\n", + "#############################################################################\n", + "Template\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8bbd660d-d22f-44fa-bf53-dd622dee0f53", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "class Template:\n", + " \"\"\"\n", + " Brief imperative description of what the class does in one line, if needed.\n", + " \"\"\"\n", + "\n", + " def __init__(self):\n", + " pass\n", + "\n", + " def method1(self, arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the method does in one line.\n", + "\n", + " You can elaborate more in the method docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every method/function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass\n", + "\n", + "\n", + "def template_function(arg1: int) -> None:\n", + " \"\"\"\n", + " Brief imperative description of what the function does in one line.\n", + "\n", + " You can elaborate more in the function docstring in this section, for e.g. explaining\n", + " the formula/algorithm. Every function should have a docstring, typehints and include the\n", + " parameters and return as follows:\n", + "\n", + " :param arg1: description of arg1\n", + " :return: description of return\n", + " \"\"\"\n", + " # Code bloks go here.\n", + " # Make sure to include comments to explain what the code is doing.\n", + " # No empty lines between code blocks.\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "id": "103f6e36-54cf-442c-b137-8091d48805a7", + "metadata": {}, + "source": [ + "## The flow should be highlighted using headings in markdown\n", + "```\n", + "# Level 1\n", + "## Level 2\n", + "### Level 3\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d05d52af-67ba-4a4f-a561-af453e43854f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.py new file mode 100644 index 000000000..8566ff277 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template.example.py @@ -0,0 +1,125 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Template Example Notebook +# +# This is a template notebook. The first heading should be the title of what notebook is about. For example, if it is a project on neo4j tutorial the heading should be `Project Title`. +# +# - Add description of what the notebook does. +# - Point to references, e.g. (neo4j.example.md) +# - Add citations. +# - Keep the notebook flow clear. +# - Comments should be imperative and have a period at the end. +# - Your code should be well commented. +# +# The name of this notebook should in the following format: +# - if the notebook is exploring `pycaret API`, then it is `pycaret.example.ipynb` +# +# Follow the reference to write notebooks in a clear manner: https://github.com/causify-ai/helpers/blob/master/docs/coding/all.jupyter_notebook.how_to_guide.md + +# %% +# %load_ext autoreload +# %autoreload 2 +# %matplotlib inline + +# %% +import logging +# Import libraries in this section. +# Avoid imports like import *, from ... import ..., from ... import *, etc. + +import helpers.hdbg as hdbg +import helpers.hnotebook as hnotebo + +# %% +hdbg.init_logger(verbosity=logging.INFO) + +_LOG = logging.getLogger(__name__) + +hnotebo.config_notebook() + + +# %% [markdown] +# ## Make the notebook flow clear +# Each notebook needs to follow a clear and logical flow, e.g: +# - Load data +# - Compute stats +# - Clean data +# - Compute stats +# - Do analysis +# - Show results +# +# +# +# + + +# ############################################################################# +# Template +# ############################################################################# + + +# %% +class Template: + """ + Brief imperative description of what the class does in one line, if needed. + """ + + def __init__(self): + pass + + def method1(self, arg1: int) -> None: + """ + Brief imperative description of what the method does in one line. + + You can elaborate more in the method docstring in this section, for e.g. explaining + the formula/algorithm. Every method/function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +def template_function(arg1: int) -> None: + """ + Brief imperative description of what the function does in one line. + + You can elaborate more in the function docstring in this section, for e.g. explaining + the formula/algorithm. Every function should have a docstring, typehints and include the + parameters and return as follows: + + :param arg1: description of arg1 + :return: description of return + """ + # Code bloks go here. + # Make sure to include comments to explain what the code is doing. + # No empty lines between code blocks. + pass + + +# %% [markdown] +# ## The flow should be highlighted using headings in markdown +# ``` +# # Level 1 +# ## Level 2 +# ### Level 3 +# ``` + +# %% diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template_utils.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template_utils.py new file mode 100644 index 000000000..f8916102e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/template_utils.py @@ -0,0 +1,72 @@ +""" +template_utils.py + +This file contains utility functions that support the tutorial notebooks. + +- Notebooks should call these functions instead of writing raw logic inline. +- This helps keep the notebooks clean, modular, and easier to debug. +- Students should implement functions here for data preprocessing, + model setup, evaluation, or any reusable logic. + +Import as: + +import class_project.project_template.template_utils as cpptteut +""" + +import pandas as pd +import logging +from sklearn.model_selection import train_test_split +from pycaret.classification import compare_models + +# ----------------------------------------------------------------------------- +# Logging +# ----------------------------------------------------------------------------- + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# ----------------------------------------------------------------------------- +# Example 1: Split the dataset into train and test sets +# ----------------------------------------------------------------------------- + + +def split_data(df: pd.DataFrame, target_column: str, test_size: float = 0.2): + """ + Split the dataset into training and testing sets. + + :param df: full dataset + :param target_column: name of the target column + :param test_size: proportion of test data (default = 0.2) + + :return: X_train, X_test, y_train, y_test + """ + logger.info("Splitting data into train and test sets") + X = df.drop(columns=[target_column]) + y = df[target_column] + return train_test_split(X, y, test_size=test_size, random_state=42) + + +# ----------------------------------------------------------------------------- +# Example 2: PyCaret classification pipeline +# ----------------------------------------------------------------------------- + + +def run_pycaret_classification( + df: pd.DataFrame, target_column: str +) -> pd.DataFrame: + """ + Run a basic PyCaret classification experiment. + + :param df: dataset containing features and target + :param target_column: name of the target column + + :return: comparison of top-performing models + """ + logger.info("Initializing PyCaret classification setup") + ... + + logger.info("Comparing models") + results = compare_models() + ... + + return results diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/test/test_docker_all.py b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/test/test_docker_all.py new file mode 100644 index 000000000..904cdd7af --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/test/test_docker_all.py @@ -0,0 +1,48 @@ +""" +Run each notebook in class_project/project_template/ inside Docker using docker_cmd.sh. + +Import as: + +import class_project.project_template.test.test_docker_all as tptdal +""" + +import logging + +import pytest + +import helpers.hdocker_tests as hdoctest + +_LOG = logging.getLogger(__name__) + + +# ############################################################################# +# Test_docker +# ############################################################################# + + +class Test_docker(hdoctest.DockerTestCase): + """ + Run all Docker tests for class_project/project_template/. + """ + + _test_file = __file__ + + @pytest.mark.slow + def test1(self) -> None: + """ + Test that template.example.ipynb runs without error inside Docker. + """ + # Prepare inputs. + notebook_name = "template.example.ipynb" + # Run test. + self._helper(notebook_name) + + @pytest.mark.slow + def test2(self) -> None: + """ + Test that template.API.ipynb runs without error inside Docker. + """ + # Prepare inputs. + notebook_name = "template.API.ipynb" + # Run test. + self._helper(notebook_name) diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/tutorial-runthrough.ipynb b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/tutorial-runthrough.ipynb new file mode 100644 index 000000000..13daebbe2 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/tutorial-runthrough.ipynb @@ -0,0 +1,1105 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9cc58d01", + "metadata": {}, + "source": [ + "# HuggingFace News Article Text Classifier TUTORIAL!\n", + "1. Config review\n", + "2. Data loading & splits\n", + "3. Preprocessing\n", + "4. Training\n", + "5. Predictions\n", + "6. Evaluation & results" + ] + }, + { + "cell_type": "markdown", + "id": "54592dad", + "metadata": {}, + "source": [ + "---\n", + "## Add project to Python path" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e5ce31d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project root: /app/project_files\n", + "Python path set up correctly.\n" + ] + } + ], + "source": [ + "import sys\n", + "import os\n", + "\n", + "# Make sure project_files is on the path so all imports work\n", + "PROJECT_ROOT = os.path.abspath('project_files')\n", + "if PROJECT_ROOT not in sys.path:\n", + " sys.path.insert(0, PROJECT_ROOT)\n", + "\n", + "print('Project root:', PROJECT_ROOT)\n", + "print('Python path set up correctly.')" + ] + }, + { + "cell_type": "markdown", + "id": "b58749d5", + "metadata": {}, + "source": [ + "---\n", + "## Running Config File\n", + "Central configuration. Every other module imports from here — nothing is hardcoded anywhere else." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2553f792", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#Dataset\n", + "DATASET_NAME = \"ag_news\"\n", + "NUM_LABELS = 4\n", + "LABEL_NAMES = [\"World\", \"Sports\", \"Business\", \"Sci/Tech\"]\n", + "LABEL2ID = {name: i for i, name in enumerate(LABEL_NAMES)}\n", + "ID2LABEL = {i: name for i, name in enumerate(LABEL_NAMES)}\n", + "\n", + "#Model\n", + "DEFAULT_MODEL = \"distilbert-base-uncased\"\n", + "BERT_MODEL = \"bert-base-uncased\" \n", + "ROBERTA_MODEL = \"roberta-base\" \n", + "\n", + "#Training\n", + "OUTPUT_DIR = \"models/distilbert-ag-news\"\n", + "EPOCHS = 2\n", + "BATCH_SIZE = 16\n", + "LEARNING_RATE = 2e-5\n", + "WEIGHT_DECAY = 0.01\n", + "WARMUP_STEPS = 500\n", + "MAX_LENGTH = 128 #Max token length per article\n", + "TRAIN_SUBSET = 100\n", + "EVAL_SUBSET = 50\n", + "\n", + "#Evaluation\n", + "RESULTS_DIR = \"results\"\n", + "\n", + "#Reproducibility\n", + "SEED = 42\n", + "\n" + ] + } + ], + "source": [ + "# Read and print config.py so we can walk through it on camera\n", + "with open('project_files/config.py') as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "id": "ec6d0446", + "metadata": {}, + "source": [ + "---\n", + "## Data Loading & Splits\n", + "Loads AG News from HuggingFace hub and applies the 90/10 train/validation split.\n", + "\n", + "**Full dataset sizes (when TRAIN_SUBSET = None):**\n", + "- Train: 108,000 (90% of original 120k)\n", + "- Validation: 12,000 (10% of original 120k)\n", + "- Test: 7,600 (original test split — untouched during training)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b31ee031", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using train subset: 100\n", + "Using validation subset: 10\n", + "Using test subset: 50\n", + "\n", + "Dataset Summary:\n", + "\n", + " Split: train (100 examples)\n", + " [0] World 28 \n", + " [1] Sports 26 \n", + " [2] Business 17 \n", + " [3] Sci/Tech 29 \n", + "\n", + " Split: test (50 examples)\n", + " [0] World 13 \n", + " [1] Sports 12 \n", + " [2] Business 16 \n", + " [3] Sci/Tech 9 \n", + "\n", + " Split: validation (10 examples)\n", + " [0] World 1 \n", + " [1] Sports 2 \n", + " [2] Business 5 \n", + " [3] Sci/Tech 2 \n", + "────────────────────────────────────────────────────\n", + "\n" + ] + } + ], + "source": [ + "from utils.dataset_loader import load_ag_news, get_subsets, summarize_dataset\n", + "\n", + "dataset = load_ag_news()\n", + "dataset = get_subsets(dataset)\n", + "summarize_dataset(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d077dc04", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual split sizes after subsetting:\n", + " Train : 100\n", + " Validation : 10\n", + " Test : 50\n", + "\n", + "The test split is completely untouched during training.\n", + "Validation is used by the Trainer to pick the best checkpoint.\n" + ] + } + ], + "source": [ + "# Confirm exact split sizes\n", + "print('Actual split sizes after subsetting:')\n", + "print(f' Train : {len(dataset[\"train\"]):,}')\n", + "print(f' Validation : {len(dataset[\"validation\"]):,}')\n", + "print(f' Test : {len(dataset[\"test\"]):,}')\n", + "print()\n", + "print('The test split is completely untouched during training.')\n", + "print('Validation is used by the Trainer to pick the best checkpoint.')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7fbc5e93", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "3 Sample Articles from 'train' split\n", + "\n", + " [1] Label: Sci/Tech\n", + " Text : Space Station Tinkering Works (CBS/AP) Space station commander Gennady Padalka and flight engineer Michael Fincke completed a five-hour 21-minute spacewalk, successfully installing a new coolant syste...\n", + "\n", + " [2] Label: World\n", + " Text : Kerry and Bush Congratulate Red Sox (AP) AP - Count Sen. John Kerry of Massachusetts among those Boston baseball fans who have waited a lifetime to see the Red Sox win the World Series. President Bush...\n", + "\n", + " [3] Label: Sci/Tech\n", + " Text : Getting Listed in Netscapes Open Directory Project Getting Listed in Netscape's Open Directory Project\\\\When you are deciding on the major search engine directories that you want your website to be li...\n", + "────────────────────────────────────────────────────\n", + "\n" + ] + } + ], + "source": [ + "# Show a few raw sample articles to see what the data looks like\n", + "from utils.dataset_loader import get_sample_articles\n", + "_ = get_sample_articles(dataset, n=3, split='train')" + ] + }, + { + "cell_type": "markdown", + "id": "3f887bbe", + "metadata": {}, + "source": [ + "---\n", + "## Preprocessing\n", + "Three stages: **clean → tokenize → format as PyTorch tensors**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b76e8232", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Raw : ' NASA launches new satellite & rover into orbit. https://nasa.gov '\n", + "Clean : 'NASA launches new satellite rover into orbit.'\n" + ] + } + ], + "source": [ + "from utils.preprocessing import get_tokenizer, tokenize_dataset, clean_text\n", + "\n", + "raw = ' NASA launches new satellite & rover into orbit. https://nasa.gov '\n", + "print('Raw :', repr(raw))\n", + "print('Clean :', repr(clean_text(raw)))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5dbc8d54", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[preprocessing] Loading tokenizer: distilbert-base-uncased\n" + ] + } + ], + "source": [ + "# Load the tokenizer — downloads DistilBERT vocabulary\n", + "tokenizer = get_tokenizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "191df614", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Text : Apple reports record iPhone sales this quarter\n", + "Tokens : ['apple', 'reports', 'record', 'iphone', 'sales', 'this', 'quarter']\n", + "IDs : [101, 6207, 4311, 2501, 18059, 4341, 2023, 4284, 102]\n", + "\n", + "Note: [CLS]=101 prepended, [SEP]=102 appended automatically\n" + ] + } + ], + "source": [ + "# Show how a sentence is split into subword tokens\n", + "sample_text = 'Apple reports record iPhone sales this quarter'\n", + "tokens = tokenizer.tokenize(sample_text)\n", + "ids = tokenizer.encode(sample_text)\n", + "print('Text :', sample_text)\n", + "print('Tokens :', tokens)\n", + "print('IDs :', ids)\n", + "print()\n", + "print('Note: [CLS]=101 prepended, [SEP]=102 appended automatically')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "028d8e03", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[preprocessing] Tokenizing dataset (max_length=128)...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 3948.70 examples/s]\n", + "Tokenizing: 100%|██████████| 50/50 [00:00<00:00, 2319.76 examples/s]\n", + "Tokenizing: 100%|██████████| 10/10 [00:00<00:00, 1921.70 examples/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[preprocessing] Tokenization complete.\n", + "\n", + "Tokenized split sizes:\n", + " Train : 100\n", + " Validation : 10\n", + " Test : 50\n", + "\n", + "input_ids shape : torch.Size([128])\n", + "attention_mask : tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) ...\n", + "label : tensor(2)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# Tokenize the full dataset — batched=True is 10-50x faster than row-by-row\n", + "tokenized = tokenize_dataset(dataset, tokenizer)\n", + "\n", + "print('\\nTokenized split sizes:')\n", + "print(f' Train : {len(tokenized[\"train\"]):,}')\n", + "print(f' Validation : {len(tokenized[\"validation\"]):,}')\n", + "print(f' Test : {len(tokenized[\"test\"]):,}')\n", + "print()\n", + "print('input_ids shape :', tokenized['train'][0]['input_ids'].shape)\n", + "print('attention_mask :', tokenized['train'][0]['attention_mask'][:10], '...')\n", + "print('label :', tokenized['train'][0]['label'])" + ] + }, + { + "cell_type": "markdown", + "id": "781384ac", + "metadata": {}, + "source": [ + "---\n", + "## Training\n", + "Fine-tunes DistilBERT on AG News using the HuggingFace Trainer API." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f4c87b9d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using train subset: 100\n", + "Using validation subset: 10\n", + "Using test subset: 50\n", + "[preprocessing] Loading tokenizer: distilbert-base-uncased\n", + "[preprocessing] Tokenizing dataset (max_length=128)...\n", + "[preprocessing] Tokenization complete.\n", + "\n", + "[train] Loading model: distilbert-base-uncased\n", + "[train] Total params : 66,956,548\n", + "[train] Trainable params: 66,956,548\n", + "\n", + "[train] Starting fine-tuning — 2 epoch(s)...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading weights: 100%|██████████| 100/100 [00:00<00:00, 4902.81it/s]\n", + "[transformers] \u001b[1mDistilBertForSequenceClassification LOAD REPORT\u001b[0m from: distilbert-base-uncased\n", + "Key | Status | \n", + "------------------------+------------+-\n", + "vocab_layer_norm.weight | UNEXPECTED | \n", + "vocab_transform.bias | UNEXPECTED | \n", + "vocab_layer_norm.bias | UNEXPECTED | \n", + "vocab_projector.bias | UNEXPECTED | \n", + "vocab_transform.weight | UNEXPECTED | \n", + "pre_classifier.weight | MISSING | \n", + "classifier.weight | MISSING | \n", + "pre_classifier.bias | MISSING | \n", + "classifier.bias | MISSING | \n", + "\n", + "Notes:\n", + "- UNEXPECTED:\tcan be ignored when loading from different task/architecture; not ok if you expect identical arch.\n", + "- MISSING:\tthose params were newly initialized because missing from the checkpoint. Consider training on your downstream task.\n", + "[transformers] `logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.\n", + " 0%| | 0/14 [00:008,} KB')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "4db74f87", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: distilbert-base-uncased\n", + "Epochs: 2\n", + "Batch size: 16\n", + "Learning rate: 2e-05\n", + "\n", + "{'eval_loss': 1.399472951889038, 'eval_accuracy': 0.2, 'eval_f1_macro': 0.0833, 'eval_runtime': 0.8181, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 1.222, 'epoch': 1.0, 'step': 7}\n", + "{'eval_loss': 1.3993321657180786, 'eval_accuracy': 0.2, 'eval_f1_macro': 0.0833, 'eval_runtime': 0.7555, 'eval_samples_per_second': 13.237, 'eval_steps_per_second': 1.324, 'epoch': 2.0, 'step': 14}\n", + "{'train_runtime': 95.7502, 'train_samples_per_second': 2.089, 'train_steps_per_second': 0.146, 'total_flos': 6623606169600.0, 'train_loss': 1.3771130698067802, 'epoch': 2.0, 'step': 14}\n", + "\n" + ] + } + ], + "source": [ + "# Show the training log — accuracy and F1 per eval step\n", + "log_path = 'models/distilbert-ag-news/train_results.txt'\n", + "with open(log_path) as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "id": "edb0c85b", + "metadata": {}, + "source": [ + "---\n", + "## Predictions\n", + "Load the saved model and classify new articles." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cfd5b1ec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[predict] Loading model from: models/distilbert-ag-news/best\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading weights: 100%|██████████| 104/104 [00:00<00:00, 1723.50it/s]\n" + ] + } + ], + "source": [ + "from scripts.predict import load_model, predict, display_results\n", + "\n", + "tokenizer, model = load_model('models/distilbert-ag-news')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "37b85b81", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "── Result 1 ─────────────────────────────────────────────\n", + " Text : Apple reports record iPhone sales this quarter\n", + " Prediction : Sports (26.45% confidence)\n", + " All scores :\n", + " Sports 26.45% ▓▓▓▓▓\n", + " Sci/Tech 25.56% ▓▓▓▓▓\n", + " World 24.10% ▓▓▓▓\n", + " Business 23.88% ▓▓▓▓\n", + "\n", + "── Result 2 ─────────────────────────────────────────────\n", + " Text : Manchester United defeats Arsenal 3-1 in Premier League\n", + " Prediction : Sports (26.66% confidence)\n", + " All scores :\n", + " Sports 26.66% ▓▓▓▓▓\n", + " Sci/Tech 26.31% ▓▓▓▓▓\n", + " World 24.23% ▓▓▓▓\n", + " Business 22.80% ▓▓▓▓\n", + "\n", + "── Result 3 ─────────────────────────────────────────────\n", + " Text : NASA launches new satellite to study climate change\n", + " Prediction : Sports (27.96% confidence)\n", + " All scores :\n", + " Sports 27.96% ▓▓▓▓▓\n", + " Sci/Tech 25.51% ▓▓▓▓▓\n", + " Business 23.35% ▓▓▓▓\n", + " World 23.18% ▓▓▓▓\n", + "\n", + "── Result 4 ─────────────────────────────────────────────\n", + " Text : Senate passes new immigration reform bill\n", + " Prediction : Sports (27.79% confidence)\n", + " All scores :\n", + " Sports 27.79% ▓▓▓▓▓\n", + " World 25.38% ▓▓▓▓▓\n", + " Sci/Tech 24.54% ▓▓▓▓\n", + " Business 22.29% ▓▓▓▓\n" + ] + } + ], + "source": [ + "# One article per class — none of these were seen during training\n", + "articles = [\n", + " 'Apple reports record iPhone sales this quarter', # Business\n", + " 'Manchester United defeats Arsenal 3-1 in Premier League', # Sports\n", + " 'NASA launches new satellite to study climate change', # Sci/Tech\n", + " 'Senate passes new immigration reform bill', # World\n", + "]\n", + "\n", + "results = predict(articles, tokenizer, model)\n", + "display_results(results)" + ] + }, + { + "cell_type": "markdown", + "id": "623b99b9", + "metadata": {}, + "source": [ + "---\n", + "## Best Model Evaluation\n", + "Run the full evaluation on the held-out test set (never seen during training)." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "be3253d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[evaluate] Device: cpu\n", + "[evaluate] Loading model from: models/distilbert-ag-news/best\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading weights: 100%|██████████| 104/104 [00:00<00:00, 2588.29it/s]\n", + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using train subset: 100\n", + "Using validation subset: 10\n", + "Using test subset: 50\n", + "[preprocessing] Tokenizing dataset (max_length=128)...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 2463.72 examples/s]\n", + "Tokenizing: 100%|██████████| 50/50 [00:00<00:00, 6135.07 examples/s]\n", + "Tokenizing: 100%|██████████| 10/10 [00:00<00:00, 1296.06 examples/s]\n", + "Evaluating: 0%| | 0/2 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Show the confusion matrix heatmap\n", + "from IPython.display import Image, display\n", + "display(Image('results/confusion_matrix.png'))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5c16222e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABUYAAALuCAYAAABiqcxsAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjksIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvJkbTWQAAAAlwSFlzAAAXEgAAFxIBZ5/SUgAAnidJREFUeJzs3X+81/P9P/7b6XcqKSVLSii2yK+YhML8/p2fMb+a/cDGe7aZ+TXfzT428+ttxsxvJkzY/P6d/FpIhFBERaIU6fePc17fP1zOeTs61Sl1jvW8Xnc5l53zfDyej+f9+Tqvc6qbx/PxKCuVSqUAAAAAABRIg/ouAAAAAACgrglGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAFB46623XsrKytKvX7/6LoVvgGOPPTZlZWUpKyur71IAAFiJBKMAsAoaN25cVbDz1Y9GjRqlbdu22XzzzXPCCSdk2LBh9V3u1zZixIicc8452XHHHdO5c+esttpqad68eTp27Jh+/frl9NNPz0svvVTfZX7jffW9cumll9bqvKlTp6Zp06bVzr3mmmtWbrGsMm6++eZq750HH3xwuccaNmxYzjrrrOy4447p0qVLWrZsmWbNmqVDhw7p1atXjj/++AwaNCiff/7516773HPPrVb38ccfv9Rznnzyyar+Z5111teuAQD4egSjAFAw5eXl+fTTTzNy5Mj87W9/S+/evXPsscdmwYIF9V3aMhszZkz23nvvbLXVVvn973+fp59+Ou+//37mzJmTuXPnZtKkSRk6dGj+9Kc/pVevXtl6663z5JNP1nfZ/zWuv/76WvX7xz/+kfnz56/kav7Pl8OlG264oc6uy8px7bXXVvv6uuuuW+YxRowYkX79+qV37975wx/+kKeffjoTJkzIrFmzMm/evEyePDkvvfRSrr322hx55JFZa621csIJJ2TixIkr6jZyww035K233lph4wEAK1+j+i4AAFi5evXqVS3gKi8vz6RJk/Lggw/mb3/7W+bPn58bb7wxTZo0yd///vd6rHTZPPzwwzn88MPz2WefJUk6duyYQw89NDvssEPWXnvtNGnSJB9//HGGDx+ee+65JyNGjMjw4cNz7rnnCkeXonnz5pkzZ05effXVvPTSS9lqq62W2L/y/VV53n+7G264QeBaR8aOHZunnnoqSdKqVavMmDEj99xzTz755JO0a9euVmPcdNNN+fGPf5y5c+cmSbp27Zr+/ftnu+22S4cOHdK8efN88sknGTt2bJ544ok89NBDmTlzZv72t79lo402yv/8z/+skHspLy/PGWeckbvuumuFjAcArHyCUQBYxbVo0SKbbLJJtWObbbZZ9thjjxx44IHZdddds3DhwlxzzTX5xS9+kY022qieKq29V155JQceeGBVCHfGGWfk7LPPTrNmzRbpu/fee+e3v/1tHn/88fzqV7+q61L/K22xxRb58MMPM27cuFx//fVLDEZHjBiRkSNHJkn69++fW265pa7KZBVw3XXXpVQqpaysLFdddVWOOOKIzJ8/PzfffHN+/vOfL/X8Bx54IMcdd1wqKirSuHHjXHTRRTnhhBPSqFHN/8w54YQT8umnn+aKK67I+eefv8Luo3379pkyZUruvvvuPP/88/nud7+7wsYGAFYej9IDQIH169cvBx98cJKkVCrl/vvvr+eKlm7hwoU59NBDq0LRCy+8MH/4wx9qDEW/bJdddsl//vOf/OAHP6iLMv+rlZWV5dhjj02SDBo0KPPmzVts38rHnrt06ZJddtmlLspjFVFeXl41M3fHHXfMgAEDsvHGGyep3eP0U6dOzZFHHpmKioqUlZXlzjvvzM9+9rPFhqKV2rRpkzPPPDOvvvpqNt988697G0mS3/zmN2nQ4It/Wv36179eIWMCACufYBQACq5Pnz5Vn7/33ns19nn00Udz7LHHplu3bmnZsmVWW221bLDBBjn66KPz9NNPL3H8r+74/v777+fXv/51Ntlkk6yxxhrLtMlP8sV6lm+//XaSL4LdX/ziF7U+t2nTpjnqqKNq3b/SwoUL88gjj+TUU09Nnz590qFDhzRp0iQtW7bMhhtumCOPPDKPPfZYrcZ69NFHc+SRR6Zbt25p0aJFmjRpkrXXXjs9evTIoYcemquvvjqffPJJjee++eabOeWUU7L55pundevWady4cdq1a5fu3btn9913zwUXXJDRo0cv8/3VpHJn9k8//TT/+te/auwzb968DBo0KElyzDHHLNMu7rNnz85ll12W3XbbLR07dkzTpk3Ttm3bbL311jn77LPz8ccfL3JO5aZiO+20U9Wx4447bpFNo9Zbb71q5/Xr16/a8WnTpuV3v/tdttpqq6y55popKyur9jj1suxKP2LEiPzsZz/LZpttlrZt26Zx48Zp27Ztvvvd7+bUU0/Nf/7znxrPe//993PGGWdkm222SZs2bdK4ceO0adMmG264Yfr165ff/e53GTFixNJfyP9iDz30UD788MMkqQriK///9ddfzwsvvLDE8y+99NKqpTSOP/747Lvvvst0/fXXX7/q99LXtemmm+b73/9+kmTo0KFfawOpSgsWLMj111+f/fbbL506dUqzZs3SunXr9OzZMz//+c8X+/v6qquuqnr/Lu7999vf/raqT48ePRZbwzrrrJOysrLstddei7QtXLgw119/ffbaa6+q+lZbbbWsu+662WKLLfKTn/wkd955Z52uPwwAy6wEAKxy3nvvvVKSUpJS3759l9j3yiuvrOp7wgknVGubNm1aaY899qhqX9zHD37wg9K8efNqHL9Lly5VdTz00EOl1q1bL3L+JZdcUut722677arOu//++2t93pJ8ucaanHLKKUt9DZKUDj/88NLcuXNrHKO8vLx09NFH12qcq6++epHz//a3v5UaNWq01HOPPPLI5X4dKsfo06dPqVQqlXbZZZdSktLuu+9eY//bbrutlKRUVlZWevfdd0vXX3/9Eu+h0pAhQ0prr732Eu+jVatWpbvvvrvaeV9+Xy/po0uXLtXO69u3b9Xxl156qdSxY8dFzjnllFOq+h9zzDFVxxdn9uzZtf5+ftU999xTWm211ZZ6XuX3YVXVv3//UpJSixYtSjNmzCiVSqXSxIkTSw0bNiwlKf3oRz9a4vnf+ta3ql6r0aNH10XJ1fz2t7+tuv6jjz5aGjduXKlp06alJKXNNtusVFFRscg5Q4YMqTrnzDPPXOzYr776aqlbt25LfH80bty4dOWVVy5y7ttvv13V57zzzqtx/D59+lQba9KkSYv0efPNN6va//znP1drmzx5cmmLLbao1fv/7bffXtpLCQD1xhqjAFBwr7zyStXn66yzTtXns2bNSr9+/fLqq68m+WKtzkMOOSTrrbdeWrRokbfeeitXXXVVnnnmmapdpa+55prFXuf999/PoYcemiQ566yzsvPOO2f11VfP2LFj0759+1rVOmvWrKpZZM2bN8+uu+66TPe6vBYuXJiOHTtmv/32y7bbbpsNNtggLVq0yOTJk/PWW2/liiuuyJgxY3LbbbelXbt2+ctf/rLIGH//+99z0003JUk23njj/PjHP86mm26atm3bZs6cOXn33Xfz/PPP55577lnk3Ndffz0nnXRSysvL07Zt2/zoRz9K37590759+5SXl+fDDz/MSy+9lAceeGCZZm0uzXHHHZfHH388jz76aD744IN06tSpWnvl4879+vVL165dM3To0KWO+cQTT2SPPfbIggULsvrqq+cnP/lJttlmm3Tp0iWzZ8/OM888k//93//N5MmTc8ghh+TRRx+tmtW3zjrr5LXXXsuLL76YgQMHJknOO++87L///tWu0aRJkxqvPXPmzBxwwAGZOnVqTjnllOy1115p165dPvjgg5SXl9f6dVmwYEH22muvqk281l577Zxwwgnp06dP2rZtmxkzZuS1117Lgw8+mIcffrjauZMnT86RRx6Z2bNnp3nz5jn++OOz2267pUOHDikrK8vHH3+cV155ZZHzVjVTpkzJvffemyQ5+OCD07JlyyRfbKK222675cEHH8xtt92WSy65JKutttoi57/11luZNGlSkqR79+7p3r173RW/GF26dMkJJ5yQSy+9NCNHjsygQYNy5JFHLvM4b7zxRvr06ZMZM2akWbNmOe6449K3b9+st956KS8vzwsvvJDLLrss7733Xk444YS0bNmyarZqkmy44Ybp3LlzJkyYkMcffzxnnnlmtfFnzpy5yGzcJ554IkcccUS1Y48//njV519dJuNnP/tZXn755aq2o446Kl27ds3qq6+e6dOn56233spTTz2V++67b5nvHwDqVH0nswDAilfbGaNjxowptWjRoqrvsGHDqtp+/OMfV83meuKJJ2o8v6KionTyySdXnf/ss88u0qdyNmaSUrt27b7WzK7nnnuuaqxtt912ucdZXI2Le63eeeed0oIFCxZ7fnl5eemoo44qJSk1bNiwNG7cuEX67LDDDqUkpXXXXbc0ffr0JY41bdq0asfOPvvsqvt++eWXl3gvn3zyyRLbl6TyGpUzFWfPnl01w/erM8/ef//9UoMGDUpJSjfddFOpVCotdcbo9OnTS2uttVbVNb56n5UmTZpUNVtuo402KpWXl1dr//Ksu+uvv36p91U5YzRJqVmzZqXnnntuif2XNmP0nHPOqWrfaaedlvj9HD9+fLWvr7322qpzvzoj9qu+zvfym+7CCy+seh2GDBlSre3222+varvxxhtrPH/QoEFVfY444og6qHhRX50xWiqVSlOmTCm1atWqlKTUtWvXRWbSL23GaHl5ealHjx5V7/0JEybUeO0ZM2aUevfuXUpSatu2benzzz+v1n7ccceVkpSaNm1amj17drW2+++/v6ptr732KiUpDRw4cJFrHHjggVXjf3n265w5c0qNGzcuJSkdcMABNc6MrTRz5szSnDlzFtsOAPXNGqMAUDDl5eWZOHFirr766uywww6ZNWtWkmTXXXet2kl54sSJVbMBzz777GprOn5ZWVlZ/vznP2fttddO8sXadkty/vnnf62ZXV9ee7NDhw7LPc6y2mCDDZa4oUuDBg1yySWXpGHDhikvL69xTc6PPvooSbLVVltl9dVXX+JYbdq0qfHcNdZYY6mbxay55ppLbF8WzZs3z+GHH54kVZvkVLrhhhtSUVGR1VdfPQcddFCtxrvqqqsyefLkNG7cOLfddtsi91lp7bXXzkUXXZQkGT16dK1motbWL37xi/Tu3Xu5z58xY0bVmrjt2rXLHXfcscTvZ+fOnat9Xfm9TLLYn6tKK/J7+U1T+fula9eu6du3b7W2/fffv+q9UTkb/au+/LtgrbXWWuK1Pvroo7z++uuL/ViR2rVrl1/96ldJvliz+W9/+9synX/nnXdm1KhRSZIbb7wx6667bo39WrZsWfX7dtq0aRk8eHC19p133jnJF+sAP/vss9XaKmeC9u7dO/vss0+1Y5UqKiqqZkTvtNNO1WaiT5s2LQsWLEjyf+v3Lk6LFi2WujEeANQnwSgArOKGDh1abWOaRo0apVOnTvnRj35UtcFNr169cuutt1adc++991b9w/erj1d+VZMmTao2cPrqP8C/rHHjxksda2k+//zzqs8rH72tD7Nmzcr48ePzxhtvVIUrkyZNqgqyato0p/Ix9KeeemqZN0iqPPezzz7LHXfc8TWrXzbHHXdckuSdd97JU089lSQplUpVQelhhx1W46PONbnzzjuTJDvssMMij+V/1ZdDwyW9r5bVMccc87XOHzJkSNX78Ac/+MEyh5dfvu+rr776a9Xy32rYsGF54403kiRHH330IsFa06ZNM2DAgCTJ008/nXfeeWeRMZbld8GFF16YTTfddLEfK9qpp55a9R9uzjvvvMyYMaPW51b+jKy//vpV/6FqcTbddNOq999Xf0Yqg9Fk0dCz8utddtml6hH58ePHZ+zYsVV9RowYkU8//XSRsZIvwt/KsPPWW2/NzJkza3dzAPANZI1RACiopk2bZquttsrRRx+dH/zgB9VmRH55/bmvznhbkso1/2rSrVu3xQZoS5q11aZNm6q1T788M6+u/zH+3nvv5dJLL819992X9957L6VSabF9a9pV/kc/+lGGDBmSadOmpWfPntl7772z++67Z9ttt02PHj2WOCP1+9//fv74xz9mzpw5OfTQQ9OnT5/su+++6dOnT7bYYou0aNFihdxjTb773e/mO9/5Tt54441cf/312XHHHfPUU09VhSiVa30uTXl5eV566aUkX6xnuCxroS7pfbUsWrRokW7dun2tMSrvIVn6jM+a7L///llrrbUyefLk/OpXv8o//vGPHHjggdlhhx2y5ZZbZo011vha9X322Wf54IMPvtYYtdGkSZPlnv1dOQu0rKxssUH1sccemyuuuCKlUinXXXdd/t//+3/V2lu1alX1+bIEj3WhRYsWOfvss/PTn/40U6ZMyUUXXZRzzz23VudW/u599913v9bPSMeOHfPtb387b775ZrVg9JNPPqlaN3qXXXZJ9+7d06lTp3zwwQd5/PHHs8EGGyRZ8vqiTZo0yTHHHJOrrroqzz//fLp06ZKDDjoou+yyS7beeuusv/76ta4bAOpdPT/KDwCsBF9eY7RXr16l1157rerjjTfeKL3//vul+fPnL/b8vffeu1a7Ddf08VWV63duv/32i73eksY75phjqvrV1xqjt99+e9Vu07X56NevX43jXHTRRTXuRt6yZcvS3nvvXbrlllsWu5bpY489VurUqdMi5zZq1Ki07bbblv785z+XPv3006/1OlSO+dXd0P/85z9XrTc7Y8aMqt3Yv/3tb1frt6Q1RqdMmbLc76ljjz222ljLu8Zop06davU6LGmN0RNOOKGq7bXXXqvVeF/18ssvl7797W8vcp9lZWWlzTbbrPTb3/629OGHHy7X2F/+HqzMjy5duixXfTNnzqxag3NJ6x+XSqWqtTY7duxYWrhwYbW2W265paqWAQMGLHMdX177eHnVtMZopfnz55c22GCDqp/vjz/+uFQqLX2N0S+v+bwsHzX9zvnpT39aSr5Y97jyd0Pl+q2tWrWq+l1T+X4/9NBDq87dddddS0lK66yzTo33Pnv27NIxxxxTKisrW6SWDh06lI455pjS0KFDl+t1BYC6ZMYoAKziWrRokU022WSZzlm4cGHV5y+88EKaN2/+teto2LDh1x5j0003TaNGjbJw4cKMHDkyCxYsSOPGjb/2uEvyzjvv5Oijj868efOy2mqr5ZRTTskee+yRDTfcMGuuuWaaNGlSNbOrc+fOef/99xc7m/TUU0/N0UcfnX/+85957LHH8txzz+Xjjz/OzJkzc//99+f+++/P+eefn3vuuSddu3atdu4uu+ySd955J/fcc08efPDBPPPMM3n77bezcOHCDBs2LMOGDcv555+fQYMGZffdd1+hr8FRRx2V3/zmN5k1a1auvfbaqvUMKx+zr40vv6d22223qjVEa2Nxa5EuqxXxHlwRNt9887z22mt5+OGHc++99+aZZ57JG2+8kYqKiowcOTIjR47MBRdckL/97W85+uij67vcFeqOO+6omuFZuczH0nz44Yd56KGHsvfee1cd+/Jau8OHD1/hdX5djRs3znnnnZcBAwZk5syZOe+883LZZZct9bzKn5MePXrktttuq/X1apo1vvPOO+fyyy9PeXl5nnzyyRxwwAFVM0H79u1bNUt9l112yY033pghQ4akVCpl/vz5eeaZZ6rGqEnz5s1zww035Iwzzsjtt9+eJ598Mi+88EJmzpyZjz/+ODfeeGNuvPHG9O/fP7fccot1RgH4xhKMAgCLaN++fdXn7dq1WySkW9EWFyR+VcuWLbP11lvnP//5T+bMmZNHHnmkWliyMlx33XWZN29ekmTw4MHZc889F9u3ck2+JWnXrl1OPPHEnHjiiUm+CF4ffvjhXHXVVXnttdfy+uuv5+CDD672yHalpk2b5pBDDskhhxySJJkyZUqeeOKJ3HTTTXnggQcybdq0HHTQQXnnnXeqNsRaETp06JC99tor99xzT04//fTMnTs3jRo1ylFHHVXrMdZcc82UlZWlVCpl7ty5yxzWf1N8+Wfjww8/XO77aNiwYfbaa6/stddeSZLp06dn6NChue2223L77bdnzpw5Oe6449KzZ8+lbrj1Zccee2yOPfbY5aqpLixuM6XanPfln/XvfOc76dChQz7++OO8/fbbGTNmzNfa2G1lOOyww3LBBRfk5ZdfzlVXXZWf//znSz2nffv2+eCDDzJjxoyv/TPSr1+/NGjQIBUVFXn88cerBaPf+973qvpVPio/ZcqUvPrqq/n0008zZ86cJIsPRit17949Z599ds4+++yUl5fn5Zdfzn333Zcrr7wykydPzl133ZUzzzxzmf5DCADUJZsvAQCL2Gqrrao+r9xw55vihz/8YdXnF1544Uq/3muvvZbki1mLSwpF33jjjeVa93TDDTfMSSedlOHDh1cFYCNGjKjVBk3t27fPYYcdlvvvv78qaJ01a1b+/e9/L3MdS1O5lujcuXOTJHvuuecyha+NGzdOz549k3wxw68yeFkey7L24orWq1evqs+HDBmywsZt3bp19ttvvwwaNCh//OMfk3yxM/jtt9++wq5R38aMGVM1E3H33XfPrbfeutSPyh3r77vvvkyePLnaeD/4wQ+qPq+L3wXLqqysrOp7OX/+/Jx99tlLPafyd++ECRMyfvz4r3X9Nm3aZMstt0zyxZqhX95g6cvrhnbs2DEbbbRRVb8lrS+6JA0bNkyvXr1y7rnn5vnnn69aU3rQoEFf6z4AYGUSjAIAi9h///2rHju+9NJLqz0GXd+OOuqobLjhhkmSJ598cplmIs2fPz//+Mc/lul6lfc+b968lJeXL7ZfbR6TXZImTZpU28xnypQpy3T+lx+fX9Zza2PvvfdOp06d0rRp0zRt2rTWmy592UEHHZQkmT17dv76178udy1fXtqhcjZvXdlpp53SunXrJMk111yTadOmrfBrrOzvZX257rrrqj4/+eSTc/jhhy/144QTTkiSLFiwIDfffHO18X7+859Xbch2zTXX5N577627m6ml3XbbrWrW5aBBgzJy5Mgl9q/8GUmSP//5z1/7+pXB5ptvvln1+nXo0GGR2aiV/b4cjG644YZZd911l+u66623XlXYuiq9hwFY9QhGAYBFdO3atWr9yFdeeSUDBw7M/Pnzl3jOo48+mmeffXal19aoUaP885//rArHfvnLX+bss8+umsm4OEOHDk3v3r1zzTXXLNP1Kv9xP3v27Nx666019vnHP/6Rv//970sc54YbblhiiDd37tyqGYhlZWXVdna+6667atzp/ssefPDBqs8rd5ZekRo1apT3338/c+fOzdy5c3PAAQcs8xgnn3xy2rVrlyQ588wzlzqzdebMmbn44otTUVFR7XjHjh2rPq/NzNoVqWXLljn11FOTfLHD98EHH5zPP/98sf0nTJhQ7esHH3xwkWNf9cADD1R9vjK+l/WhvLw8N910U5Kkbdu22XXXXWt13r777lu1fuZXH8Nv165dbr755qolGg466KBcfvnlS/0PObNmzarTQL1y1mipVMof/vCHJfYdMGBANt544yTJX//611x55ZVL7D9//vxce+21+fjjj2ts//Kj8JWzamt6PL4yGB06dGhefPHFase+6t13381jjz22xLrGjRuXN998M8mq8x4GYNVkjVEAoEaXXnppRowYkREjRuTmm2/Os88+m+OPPz7f/e5307Zt28yePTvvv/9+XnjhhfzrX//Ku+++m6uvvjp9+vRZ6bVtscUWufvuu3PYYYdl+vTpOe+883Ldddfl8MMPzw477JC11147jRo1yuTJkzNixIjce++9eeGFF5Kk6tHc2jr22GNz2WWXpVQq5fjjj89LL72UPfbYI+3bt8/48eNz6623ZvDgwdloo40ybdq0RR73rXTcccflF7/4RfbZZ59sv/326d69e1q3bp3p06fnzTffzN///ve88sorSb6YNfbl8O+yyy7LEUccke9973vZZZdd0qNHj7Rr1y7z5s2rquGee+5JknTp0iX77bffcryqK1/r1q0zePDg7Lbbbpk/f34OOOCA7L333jn44IOz0UYbpVmzZvnss8/y5ptv5qmnnsp9992XWbNm5eSTT06DBv/33/M7deqUrl275r333su1116bb3/729l6662rNnhp3LjxSg1jzjjjjDzxxBMZOnRohgwZko033jgnnHBC+vTpk7Zt22bGjBl544038sADD+SBBx7IggULqs69/fbb849//CM77LBDdt999/Ts2TNrrbVWKioqMnHixPzrX/+qmtnXunXrHHPMMSvtPurSAw88kEmTJiVJ+vfvX+tN01ZbbbXss88+uf322/Pmm2/mP//5T3r37l3Vvt9+++Xaa6/NCSeckHnz5uVnP/tZLr744hx88MHZbrvtstZaa6VFixaZOXNm3nvvvTz77LMZPHhw1X9oWBEbyy3N1ltvnYMPPjiDBw9e6uzJRo0a5a677sp2222Xzz77LCeeeGIGDRqU73//+9lkk03SokWLfP755xkzZkyee+65/Pvf/860adPy9ttvp0OHDouMt/3226dJkyaZP39+pk+fnqTmwHOnnXZKgwYNMmvWrKpji1tfdMKECdl1112z/vrrZ7/99ss222yTLl26pHnz5vnkk08ybNiwXHnllVX/serkk0+u9WsFAHVu5W98DwDUtffee6+UpJSk1Ldv3+Ue5/PPPy8NGDCgaqwlfZSVlZVuv/32Rcbo0qXL165jcUaPHl3ac889a1VfktK2225bevrpp5e5xosvvrhUVla22HG7d+9eevvtt5c4Tm1r3GOPPUrTp0+vdm7fvn1rde6GG25YGjVq1HK/npXj9OnTZ7nOv/7666vGuPrqqxfb7/nnny9tsMEGtbqnVq1alRYuXLjIGDfddNNiz+nSpUu1vpWv31ePL84xxxxTNdbizJo1q3TEEUfU6h4WN/aSPjp06FDje/W/1f777191b4899tgynXv33XdXnXv88cfX2OfFF18sbb/99rX+OWvatGnpuOOOK33wwQfLfU+//e1vq8Z79NFHl9h39OjRpUaNGlWr4cwzz1xs/zFjxpS23HLLWt/LhAkTFjvWV39/vPfeezX222qrrar9Pp88eXKN/YYMGVKruho0aFA6/fTTSxUVFUt8bQCgPpkxCgAsVqtWrTJo0KD86le/yg033JCnnnoqEyZMyPTp09OsWbN07Ngx3/nOd9K3b98ccMABK333+q/q3r17HnjggYwYMSJ33313nnzyyYwbNy5Tp05NRUVF2rRpk4022ii9e/fOIYccUrURybL6+c9/nl69euXSSy/Ns88+m6lTp2aNNdbIhhtumIMOOignnnhi1UYji/PGG2/koYceynPPPZcxY8Zk8uTJ+eSTT9KkSZN06tQpW2+9dY488sgaN3i67bbb8tBDD+Wpp57K66+/no8++ihTpkxJqVRK+/bts9lmm+WAAw7IUUcdlaZNmy7XPdalbbbZJm+99VZuv/32/Pvf/86LL76YKVOmZN68eVl99dXTtWvXbLnllvne976XffbZp2q92y876qij8q1vfStXXHFFhg8fnsmTJ9fp49GrrbZabrnllpxyyim59tpr89RTT2XixImZPXt2Wrdune7du2eHHXbI4YcfXu28Sy65JHvuuWeefPLJjBw5MpMmTcrkyZOzcOHCtG3bNj169Mg+++yTgQMHVq2f+d/u448/zv3335/ki/Ut+/Xrt0zn77nnnlWzq2+//fZceumlVY/XV+rVq1eefvrp/Oc//8m9996bp556KuPHj8+0adOycOHCtG7dOh07dswWW2yR7bffPgcddFDWWGONFXSHS9e9e/cMHDhwqUtuVOrWrVuGDx+ee++9N4MHD86wYcPy0UcfZfbs2WnVqlU6d+6czTbbLLvsskv233//Jd7LLrvskqFDhyZJ1l9//ay33nqL7ffSSy8lSTbddNO0b9++xn477LBDnn766Tz66KN5/vnnM2HChHz88ceZPn16WrRokfXXXz877rhjjj/++Gy66aa1ul8AqC9lpVKpVN9FAAAAAADUJZsvAQAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUTqP6LoDq1l577cyaNSudO3eu71IAAAAAoF5NmDAhLVq0yEcffbTCxzZj9Btm1qxZWbBgQX2XAQAAAAD1bsGCBZk1a9ZKGduM0W+Yypmio0aNqudKAAAAAKB+9ejRY6WNbcYoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwVvlg9KWXXsof//jH9O/fP506dUpZWVnKysqWe7xPP/00p5xySrp06ZKmTZumS5cu+Z//+Z989tlnK65oAAAAAGClKiuVSqX6LmJlOuCAA/Lvf/97kePLc9uffPJJevfunXfeeSfrr79+evXqlVGjRmXUqFHp3r17/vOf/6Rt27Zfq94ePXokSUaNGvW1xgEAAACA/3YrMytb5WeM9u7dO2effXbuueeeTJo0KU2bNl3usf7nf/4n77zzTvr375/Ro0fn9ttvz+uvv56f/exnGTNmTE499dQVWDkAAAAAsLKs8jNGv6pZs2aZN2/eMs8YnTRpUjp16pRGjRplwoQJ6dChQ1XbvHnzsu6662batGn58MMPs9Zaay13fWaMAgAAAMAXzBj9BnjooYdSUVGRHXbYoVoomiRNmzbNvvvum/Ly8jzwwAP1VCEAAAAAUFuC0VoaOXJkkmTLLbessb3y+KuvvlpnNQEAAAAAy0cwWksTJkxIknTq1KnG9srj48ePr7OaAAAAAIDl06i+C/hvMXPmzCTJaqutVmN7ixYtkiQzZsyo1XiV6yN81dixY9OlS5dMnDhxOaoEAAAAgFXHwoUL06jRyokwzRgFAAAAAArHjNFaatmyZZJk9uzZNbbPmjUrSdKqVatajbe4nbQqZ5Kus846y1oiAHwj3PXOD7KwYl59l1GvGjVomv4bXlvfZQBAvfnlsDszr3xhfZdRr5o2bJQLtz2ovsuA/3ora7ZoIhittc6dOydJPvjggxrbK4936dKlzmoCgG+ihRXzUl4qdjCaivouAADq17zyhZlfUV7fZQAskUfpa2mzzTZLkowYMaLG9srjPXv2rLOaAAAAAIDlIxitpT322CMNGjTI008/ncmTJ1drmzdvXu699940bNgwe+21Vz1VCAAAAADUlmD0Ky6//PJsvPHG+c1vflPt+Le+9a0MGDAg8+fPz4knnpiFC/9vrZTTTjstU6ZMyfe///2stdZadV0yAAAAALCMVvk1Ru+///78/ve/r/p6/vz5SZJtt9226tjZZ5+dvffeO0nyySefZPTo0Zk0adIiY1166aUZNmxY7rzzzmy88cbp1atXRo0alddffz3dunXLxRdfvJLvBgAAAABYEVb5YHTKlCl5/vnnFzn+5WNTpkyp1Vjt2rXLCy+8kHPPPTf/+te/cvfdd6dDhw45+eST8//9f/9f1lhjjRVVNgAAAACwEpWVSqVSfRfB/+nRo0eSZNSoUfVcCQAsn3+O+X7hd6VvWNY0h3b/R32XAQD15mfP3l74XembNGiYv/Q5rL7LgP96KzMrs8YoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAKEYzOmTMn55xzTrp3755mzZqlY8eOGThwYCZOnLjMYz366KPZe++90759+zRu3Dhrrrlmdtttt9x9990roXIAAAAAYGVY5YPRuXPnZuedd87vf//7zJw5M/vvv3/WXXfdXH/99dliiy3y7rvv1nqsSy+9NLvttlsefPDBdO/ePQcddFA23njjPPbYY+nfv3/OPPPMlXgnAAAAAMCKssoHo+edd16GDRuW3r17Z8yYMbn99tvz/PPP56KLLsqUKVMycODAWo0zZcqUnH766WncuHGGDBmSZ599NrfddlueffbZPPnkk2natGnOP//8ZQpaAQAAAID6sUoHo/Pnz8/ll1+eJPnrX/+ali1bVrWdeuqp6dmzZ4YOHZqXXnppqWM9//zzmTdvXnbeeef07du3WtuOO+6Y3XffPaVSKcOHD1+xNwEAAAAArHCrdDD67LPPZvr06dlggw2yxRZbLNJ+8MEHJ0nuvffepY7VtGnTWl1zzTXXXLYiAQAAAIA6t0oHoyNHjkySbLnlljW2Vx5/9dVXlzrWNttskzXWWCNPPPFEhg4dWq3tqaeeysMPP5xu3bplhx12+JpVAwAAAAAr2yodjE6YMCFJ0qlTpxrbK4+PHz9+qWO1bt061157bRo0aJCddtop22+/fQ4//PBsv/326devX7beeus8/PDDadKkyYq7AQAAAABgpWhU3wWsTDNnzkySrLbaajW2t2jRIkkyY8aMWo3Xv3//PPjggzn00EPz7LPPVh1fffXVs9tuu2WdddapdW09evSo8fjYsWPTpUuXTJw4sdZjAcA3SYPPOyWlBfVdRr1qUNbYn+UAFNpa8xpkYam+q6hfjcoa+PsArAALFy5Mo0YrJ8JcpWeMrmgXXXRRvve972XHHXfMq6++mpkzZ+bVV1/NzjvvnHPOOSf9+/ev7xIBAAAAgFpYpWeMVu5CP3v27BrbZ82alSRp1arVUsd68skn88tf/jJbbrll7rjjjjRo8EWmvOmmm2bw4MHp1atX7r///jz44IPZc889lzreqFGjajxeOZN0WWafAsA3ScWsD1JemlffZdSvsqb+LAeg0CaPq8j8ivL6LqNeNWng3/awIqys2aLJKj5jtHPnzkmSDz74oMb2yuNdunRZ6lg333xzkuTAAw+sCkUrNWzYsGq26FNPPbXc9QIAAAAAdWOVDkY322yzJMmIESNqbK883rNnz6WOVRmitm7dusb2yuOffvrpMtcJAAAAANStVToY7dOnT1q3bp2xY8fmlVdeWaR98ODBSZJ99913qWOtvfbaSZLhw4fX2P7iiy8mSdZbb73lKxYAAAAAqDOrdDDapEmT/PSnP02SnHTSSVVriibJxRdfnFdffTV9+/bNVlttVXX88ssvz8Ybb5zf/OY31cY64IADkiS33HJL7rvvvmpt//73vzNo0KA0aNAgBx544Eq6GwAAAABgRVmlN19KkrPOOiuPPfZYnnvuuXTr1i077LBDxo8fn+effz7t27fPddddV63/J598ktGjR2fSpEnVjh9wwAE55JBDcscdd2TfffdNr1690rVr17z33ntVs0j/8Ic/ZKONNqqzewMAAAAAls8qPWM0SZo1a5YhQ4bk7LPPzmqrrZZ//etfGT9+fI499tiMGDEi66+/fq3GKSsry+23355rr702O+64Y955553cfffdGTduXPbaa688+OCDOeOMM1by3QAAAAAAK0JZqVQq1XcR/J8ePXokSUaNGlXPlQDA8vnnmO+nvDSvvsuoVw3LmubQ7v+o7zIAoN787NnbM7+ivL7LqFdNGjTMX/ocVt9lwH+9lZmVrfIzRgEAAAAAvkowCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUziofjM6ZMyfnnHNOunfvnmbNmqVjx44ZOHBgJk6cuFzjjRs3Lj/5yU/StWvXNG3aNO3atUvv3r3z5z//eQVXDgAAAACsLKt0MDp37tzsvPPO+f3vf5+ZM2dm//33z7rrrpvrr78+W2yxRd59991lGu/BBx9Mjx498ve//z1rrrlm+vfvny233DLjxo3LVVddtZLuAgAAAABY0RrVdwEr03nnnZdhw4ald+/eeeSRR9KyZcskycUXX5xf/OIXGThwYJ588slajfXWW2+lf//+adWqVR599NFst912VW0VFRUZMWLEyrgFAAAAAGAlWGVnjM6fPz+XX355kuSvf/1rVSiaJKeeemp69uyZoUOH5qWXXqrVeKeeemrmzp2bG264oVoomiQNGjRIr169VlzxAAAAAMBKtcoGo88++2ymT5+eDTbYIFtsscUi7QcffHCS5N57713qWO+//34efvjhrL/++tlrr71WeK0AAAAAQN1aZR+lHzlyZJJkyy23rLG98virr7661LGefPLJVFRUZLvttsvChQtz11135dlnn015eXk22WSTHHbYYWnTps2KKx4AAAAAWKlW2WB0woQJSZJOnTrV2F55fPz48Usd64033kiStGzZMjvssEOGDRtWrf3MM8/M4MGDs9NOO32dkgEAAACAOrLKBqMzZ85Mkqy22mo1trdo0SJJMmPGjKWO9emnnyZJrrnmmrRs2TKDBg3KHnvskSlTpuT3v/99/vGPf+TAAw/MqFGjss4669Sqvh49etR4fOzYsenSpUsmTpxYq3EA4JumweedktKC+i6jXjUoa+zPcgAKba15DbKwVN9V1K9GZQ38fQBWgIULF6ZRo5UTYa6ya4yuSBUVFUm++EZcddVVGTBgQNq0aZPu3bvn5ptvztZbb53p06fniiuuqOdKAQAAAIDaWGVnjFbuQj979uwa22fNmpUkadWqVa3HatmyZQ455JBF2o877ri8+OKLGTp0aK3rGzVqVI3HK2eS1nbmKQB801TM+iDlpXn1XUb9Kmvqz3IACm3yuIrMryiv7zLqVZMG/m0PK8LKmi2arMIzRjt37pwk+eCDD2psrzzepUuXpY5V2adz584pKytbpH299dZLkkyePHl5SgUAAAAA6tgqG4xuttlmSZIRI0bU2F55vGfPnksda4sttkjyf2uNftW0adOS/N/MUgAAAADgm22VDUb79OmT1q1bZ+zYsXnllVcWaR88eHCSZN99913qWNttt13WXHPNfPTRRxk9evQi7ZWP0FcGqAAAAADAN9sqG4w2adIkP/3pT5MkJ510UtWaokly8cUX59VXX03fvn2z1VZbVR2//PLLs/HGG+c3v/lNtbEaNWqUU089NaVSKSeddFI+//zzqrbHHnssN9xwQ8rKyvLjH/94Jd8VAAAAALAirLKbLyXJWWedlcceeyzPPfdcunXrlh122CHjx4/P888/n/bt2+e6666r1v+TTz7J6NGjM2nSpEXG+tWvfpUhQ4bkscceS/fu3bPtttvmk08+ybBhw1JeXp4//OEP2Wabberq1gAAAACAr2GVnTGaJM2aNcuQIUNy9tlnZ7XVVsu//vWvjB8/Pscee2xGjBiR9ddfv9ZjNW7cOA888ED+9Kc/pV27dnn44Yfz2muvpW/fvrn33ntzxhlnrMQ7AQAAAABWpLJSqVSq7yL4Pz169EiSjBo1qp4rAYDl888x3095aV59l1GvGpY1zaHd/1HfZQBAvfnZs7dnfkV5fZdRr5o0aJi/9DmsvsuA/3orMytbpWeMAgAAAADURDAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOE0qo+LTp06NVdffXWefPLJfPDBB5kzZ07Gjh1b1X7//fdn6tSpOfzww9OkSZP6KBEAAAAAWIXVeTD68MMP58gjj8ynn36aUqmUJCkrK6vW58UXX8zvf//7tG3bNvvss09dlwgAAAAArOLq9FH60aNHp3///pk2bVoOOOCA3HjjjenRo8ci/Q4//PCUSqXcfffddVkeAAAAAFAQdRqMnn/++ZkzZ05++9vf5s4778xRRx2VNdZYY5F+G2+8cdq2bZvnnnuuLssDAAAAAAqiToPRxx9/PK1atcpZZ5211L5dunTJ+++/XwdVAQAAAABFU6fB6OTJk7PhhhumYcOGS+3buHHjLFy4sA6qAgAAAACKpk6D0dVXXz2TJ0+uVd/x48enffv2K7kiAAAAAKCI6jQY3WKLLfLhhx9m1KhRS+z31FNP5eOPP853v/vdOqoMAAAAACiSOg1GjznmmJRKpfzwhz/M1KlTa+zzwQcf5Pjjj09ZWVmOO+64uiwPAAAAACiIRnV5sSOOOCK33HJLHnroofTo0SP7779/Jk6cmCS56KKL8vrrr2fw4MGZNWtWDj744Oy99951WR4AAAAAUBB1GoyWlZXlzjvvzIknnpibbropV199dVXbaaedllKplCQ59thjc+WVV9ZlaQAAAABAgdRpMJokzZs3z/XXX5/TTjstd955Z1599dV89tlnadmyZTbZZJMcfPDB6dmzZ12XBQAAAAAUSJ0Go5dddlnKysryox/9KN/+9rdz1lln1eXlAQAAAACS1PHmS6eeemouv/zyNG3atC4vCwAAAABQTZ0Go+3bt0/r1q3r8pIAAAAAAIuo02B0hx12yJtvvpk5c+bU5WUBAAAAAKqp02D0rLPOysKFC3PKKadU7UAPAAAAAFDX6nTzpc8++yxnnHFGfve732X48OH5/ve/n29/+9tp0aLFYs/Zcccd67BCAAAAAKAI6jQY7devX8rKylIqlTJy5MiMHDlyif3LysqycOHCOqoOAAAAACiKOg1GO3funLKysrq8JAAAAADAIuo0GB03blxdXg4AAAAAoEZ1uvkSAAAAAMA3gWAUAAAAACicOn2U/sueeeaZPPTQQ3nrrbcyY8aMtGrVKhtvvHH23HPP9OnTp77KAgAAAAAKoM6D0Q8//DDf//73M3To0CRJqVSqaisrK8v555+ffv365eabb07Hjh3rujwAAAAAoADqNBidOXNmdtlll4wePTplZWXZdddds+mmm2bttdfORx99lNdffz2PPPJIhgwZku9973t58cUX06JFi7osEQAAAAAogDoNRi+55JKMHj06m2yySQYNGpRNNtlkkT6jRo3KgAEDMmrUqFx66aU588wz67JEAAAAAKAA6nTzpbvuuisNGzbM3XffXWMomiQ9evTI3XffnbKystxxxx11WR4AAAAAUBB1Goy+88476dGjRzbYYIMl9ttggw3So0ePjB07to4qAwAAAACKpE6D0S9vtLQ0ZWVlK7ESAAAAAKDI6jQY3XDDDTNq1KiMGzduif3ee++9vP7661l//fXrpjAAAAAAoFDqNBg98MADU15enoMOOihvv/12jX3GjBmTgw46KKVSKQcddFBdlgcAAAAAFESd7kp/6qmn5pZbbsnLL7+c73znO9l9992zySabpEOHDvn444/z+uuv5+GHH055eXm6deuWU089tS7LAwAAAAAKok6D0VatWuXxxx/PgAED8txzz+WBBx7Igw8+WNVeuQbp9ttvn1tuuSUtW7asy/IAAAAAgIKo02A0SdZdd90888wzeeqpp/Lggw9m9OjRmTFjRlq1apWNNtooe+65Z3bccce6LgsAAAAAKJA6D0Yr7bjjjgJQAAAAAKBe1OnmSwAAAAAA3wR1Goy+8sorGThwYAYNGrTEfrfccksGDhyY1157rY4qAwAAAACKpE6D0WuuuSY33nhjOnXqtMR+6667bm644YZce+21dVQZAAAAAFAkdRqMPvnkk2nVqtVS1xbdcccd06pVqzzxxBN1VBkAAAAAUCR1Goy+//776dq1a636du3aNR988MFKrggAAAAAKKI6DUbLy8tTKpVq3X/evHkrsRoAAAAAoKjqNBjt3Llz3nrrrXz22WdL7PfZZ5/lzTffXOpapAAAAAAAy6NOg9Fdd9018+fPz+mnn77EfmeccUYWLlyY3XbbrY4qAwAAAACKpE6D0VNPPTWrrbZarr766hx88MF54YUXUlFRkSSpqKjICy+8kEMOOSRXXXVVmjdvnl/84hd1WR4AAAAAUBCN6vJiXbp0yc0335wjjjgid999d+6+++40atQoLVq0yKxZs7Jw4cKUSqU0bdo0N998c9Zbb726LA8AAAAAKIg6nTGaJAceeGCef/757LPPPmncuHEWLFiQzz77LAsWLEiTJk1ywAEH5IUXXsiBBx5Y16UBAAAAAAVRpzNGK/Xs2TP//ve/M3fu3Lzzzjv5/PPPs/rqq6dbt25p2rRpfZQEAAAAABRInc8Y/bJmzZplk002yTbbbJOKioq8+uqrmTZtWn2WBAAAAAAUwEoNRmfPnp3nnnsuL7744mL7XHjhhWnXrl222GKLbLvttunQoUMOPfTQTJ06dWWWBgAAAAAU2EoNRu+6667ssMMOueSSS2ps/9Of/pTTTjstn3/+eUqlUkqlUsrLy3PnnXdmr732qtqxHgAAAABgRVqpwejQoUOTJMcee+wibZ988kl+97vfpaysLD169Mh9992XN998M1dffXVWX331DB8+PLfccsvKLA8AAAAAKKiVuvnS8OHD06RJk+y0006LtP3zn//MnDlz0rx589x///3p3LlzkmSjjTZKw4YNM3DgwAwePDhHHXXUyiwRAAAAACiglTpj9OOPP86GG26Yxo0bL9L25JNPJkn22GOPqlC00pFHHpkWLVrklVdeWZnlAQAAAAAFtVKD0alTp6ZFixY1tg0fPjxlZWXZbbfdFmlr3LhxOnfunClTpqzM8gAAAACAglqpwWizZs0yadKkRY5PnTo148aNS5JstdVWNZ7bsmXLlJeXr8zyAAAAAICCWqnB6IYbbpiJEydm7Nix1Y4/+uijSZLmzZtniy22qPHcjz76KG3btl2Z5QEAAAAABbVSg9E99tgjFRUV+elPf5o5c+YkST777LNccMEFKSsry5577pmGDRsuct7kyZPz/vvvp1u3biuzPAAAAACgoFZqMHryySdnjTXWyCOPPJK1114722yzTbp27ZqRI0emrKwsv/jFL2o876677kqS9O3bd2WWBwAAAAAU1EoNRjt06JB77rkn7du3z4wZMzJ8+PBMnz49ZWVl+dOf/pRtt922xvOuuOKKxW7MBAAAAADwdTVa2RfYfvvtM3bs2Nx///159913s/rqq2f33XfPBhtsUGP/Tz75JD/4wQ9SVlaW7bbbbmWXBwAAAAAU0EoPRpOkRYsWOfTQQ2vVt127djnllFNWckUAAAAAQJGt1EfpAQAAAAC+iQSjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABROIYLROXPm5Jxzzkn37t3TrFmzdOzYMQMHDszEiRO/1rhvv/12mjdvnrKysnzve99bQdUCAAAAACvbKh+Mzp07NzvvvHN+//vfZ+bMmdl///2z7rrr5vrrr88WW2yRd999d7nH/tGPfpR58+atwGoBAAAAgLqwygej5513XoYNG5bevXtnzJgxuf322/P888/noosuypQpUzJw4MDlGvfaa6/Nk08+mR/+8IcruGIAAAAAYGVbpYPR+fPn5/LLL0+S/PWvf03Lli2r2k499dT07NkzQ4cOzUsvvbRM43788cf51a9+lV133TUDBgxYoTUDAAAAACvfKh2MPvvss5k+fXo22GCDbLHFFou0H3zwwUmSe++9d5nGPeWUUzJnzpxcccUVK6ROAAAAAKBurdLB6MiRI5MkW265ZY3tlcdfffXVWo/5wAMP5Pbbb88ZZ5yRDTfc8OsXCQAAAADUuVU6GJ0wYUKSpFOnTjW2Vx4fP358rcabNWtWTjzxxGy00Ub59a9/vWKKBAAAAADqXKP6LmBlmjlzZpJktdVWq7G9RYsWSZIZM2bUaryzzjor48ePz5AhQ9KkSZOvVVuPHj1qPD527Nh06dIlEydO/FrjA0B9afB5p6S0oL7LqFcNyhr7sxyAQltrXoMsLNV3FfWrUVkDfx+AFWDhwoVp1GjlRJir9IzRFWn48OG57LLLcvTRR6dfv371XQ4AAAAA8DWs0jNGK3ehnz17do3ts2bNSpK0atVqieMsXLgwP/zhD7PGGmvkwgsvXCG1jRo1qsbjlTNJ11lnnRVyHQCoaxWzPkh5aV59l1G/ypr6sxyAQps8riLzK8rru4x61aSBf9vDirCyZosmq3gw2rlz5yTJBx98UGN75fEuXboscZwPPvggr7zyStZee+0ccsgh1do+++yzJMlLL71UNZP0ySefXP6iAQAAAICVbpUORjfbbLMkyYgRI2psrzzes2fPWo330Ucf5aOPPqqx7bPPPsvQoUOXo0oAYFXTsKxpfZcAAAAsxSq9xmifPn3SunXrjB07Nq+88soi7YMHD06S7LvvvkscZ7311kupVKrxY8iQIUmSXXbZpeoYAAAAAPDNtkrPGG3SpEl++tOf5g9/+ENOOumkPPLII1U70V988cV59dVX07dv32y11VZV51x++eW5/PLLc+CBB+b888+vr9IBgFXEI488kvLyYq+x1rBhw+y22271XQYAAFSzSgejSXLWWWflsccey3PPPZdu3bplhx12yPjx4/P888+nffv2ue6666r1/+STTzJ69OhMmjSpnioGAFYl5eXlhQ9GAQDgm2iVfpQ+SZo1a5YhQ4bk7LPPzmqrrZZ//etfGT9+fI499tiMGDEi66+/fn2XCAAAAADUsVV+xmiSNG/ePL/73e/yu9/9bql9zz333Jx77rm1Hrtfv37WFQUAAACA/zKr/IxRAAAAAICvEowCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCEYwCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMJpVN8FAMB/u18OuzPzyhfWdxn1qlXjpvl/2xxQ32UAAADUmmAUAL6meeULM7+ivL7LqFdFD4YBAID/Ph6lBwAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAAoHAEowAAAABA4QhGAQAAAIDCKUQwOmfOnJxzzjnp3r17mjVrlo4dO2bgwIGZOHFircf47LPPMmjQoAwYMCBdu3ZNkyZN0qpVq3z3u9/N//7v/2bBggUr8Q4AAAAAgBWpUX0XsLLNnTs3O++8c4YNG5Zvfetb2X///TNu3Lhcf/31ue+++zJs2LCsv/76Sx3nwgsvzB/+8IeUlZVl8803z3e/+91MmTIlzz77bF544YUMHjw4Dz/8cFZbbbU6uCsAAAAA4OtY5WeMnnfeeRk2bFh69+6dMWPG5Pbbb8/zzz+fiy66KFOmTMnAgQNrNU6LFi1y2mmnZdy4cRkxYkRuu+22PP7443nttdfSuXPnPPPMMznvvPNW8t0AAAAAACvCKh2Mzp8/P5dffnmS5K9//WtatmxZ1XbqqaemZ8+eGTp0aF566aWljvWb3/wmf/rTn9K5c+dqx7t165Y//vGPSZJbb711BVYPAAAAAKwsq3Qw+uyzz2b69OnZYIMNssUWWyzSfvDBBydJ7r333q91nc022yxJ8uGHH36tcQAAAACAurFKB6MjR45Mkmy55ZY1tlcef/XVV7/Wdd59990kydprr/21xgEAAAAA6sYqHYxOmDAhSdKpU6ca2yuPjx8//mtd53//93+TJPvvv//XGgcAAAAAqBur9K70M2fOTJLF7hTfokWLJMmMGTOW+xp/+9vf8thjj2WNNdbI6aefXuvzevToUePxsWPHpkuXLpk4ceJy1wRA3VprXoMsLNV3FfWr2cKyqj+7GnzeKSktqOeK6ldZg+ZVr0dFRUVKpWK/QSoqKvzdBqBg/P0oaVTWwJ9/sAIsXLgwjRqtnAhzlZ4xurI9/fTTOeWUU1JWVpbrrrsuHTt2rO+SAAAAAIBaWKVnjFbuQj979uwa22fNmpUkadWq1TKP/frrr2f//ffP/Pnzc9lll+XAAw9cpvNHjRpV4/HKmaTrrLPOMtcEQP2YPK4i8yvK67uMetWyUcOqP7sqZn2Q8tK8eq6ofjVssHrV6/Hqq68WfsZogwYN/N0GoGD8/Shp0sC/7RfnkUceSXl5sd8fDRs2zG677VbfZfxXWFmzRZNVPBjt3LlzkuSDDz6osb3yeJcuXZZp3Pfeey+77bZbPv3005x77rn52c9+9vUKBQAAACiI8vLywgejfDOs0o/Sb7bZZkmSESNG1Nheebxnz561HnPSpEnZddddM2nSpJxyyin57W9/+/ULBQAAAADq1CodjPbp0yetW7fO2LFj88orryzSPnjw4CTJvvvuW6vxPv300+y+++4ZO3ZsjjvuuFxyySUrslwAAAAAoI6s0sFokyZN8tOf/jRJctJJJ1WtKZokF198cV599dX07ds3W221VdXxyy+/PBtvvHF+85vfVBtr9uzZ2XvvvfPaa6/l0EMPzdVXX52ysrK6uREAAACAr6FUKqWiouIb8cEX6vv7UNcf38R191fpNUaT5Kyzzspjjz2W5557Lt26dcsOO+yQ8ePH5/nnn0/79u1z3XXXVev/ySefZPTo0Zk0aVK142eeeWb+85//pGHDhmnUqFF+8IMf1Hi9G264YWXdCgAAAECtlZeXZ+rUqZkxY0bmz59f3+VUadOmzTcyJKtLZWVlGT16dH2XUeeaNGmSVq1aZc0110zDhg3ru5xVPxht1qxZhgwZkvPPPz+DBg3Kv/71r7Rt2zbHHntsfv/736dTp061GufTTz9N8sUvlUGDBi22n2AUAAAAqG/l5eWZMGFC5s6dW9+lLKJly5b1XQL1ZP78+Zk6dWpmzZqVzp0713s4usoHo0nSvHnz/O53v8vvfve7pfY999xzc+655y5y/IYbbhB6AgAAAP8Vpk6dmrlz56Zhw4bp0KFDWrRokQYNvhkrKk6fPt2M0bKytG7dur7LqFMVFRWZNWtWPv7448ydOzdTp07NWmutVa81FSIYBQAAACiSGTNmJEk6dOjwjQvgGjRoIBgtK/vGBNV1pUGDBlXvxQ8//DAzZsyo92C0WN8BAAAAgFVcqVSqWlO0RYsW9VwNVFf5npw/f369B+SCUQAAAIBVyJfDpqLNSuSb78vvScEoAAAAAEAdE4wCAAAAAIUjGAUAAAAACkcwCgAAAAAUjmAUAAAAgDrTunXrrLHGGsv0saK0adOm2kfbtm3TpUuX7LnnnrnpppvqfTOgr7rhhhtSVlaWc889d7nHOPbYY1NWVpYnn3xyhdW1qmhU3wUAAAAAQF0aMGBAkqS8vDzvvfdenn/++QwbNixDhw7NtddeW8/VUVcEowAAAAAF9cthd2Ze+cL6LqNGTRs2yoXbHrRSxr7iiiuqfT1kyJAceuihueuuu3LIIYdkjz32WCnXXVYHHnhgtt1227Rr1265xzj//PNz+umnp3PnziuwslWDYBQAAACgoOaVL8z8ivL6LqPe7bTTTjnssMNyyy235IEHHvjGBKOtW7dO69atv9YY3/rWt/Ktb31rBVW0arHGKAAAAACF17NnzyTJxIkTq461adMmPXv2zPz583PBBRdkm222SYcOHXLkkUdW9Zk9e3Yuvvji7LjjjunUqVM6deqUXXfdNbfeeutirzVt2rSceeaZ2XTTTdOiRYusvvrq2XTTTXPaaadl0qRJVf0Wt8bo/Pnzc8UVV2TrrbfOmmuumdVWWy3rrbde9tlnn9x2223V+i5pjdH3338/P/7xj9OlS5c0bdo0a621Vvr3758XX3xxkb7jxo1LWVlZ+vXrlzlz5uT000+vOm/DDTfMn/70p2/cGq1LY8YoAAAAAIU3Y8aMJEmTJk2qHa+oqMiRRx6Z//znP9luu+3So0ePtGnTJkkyZcqUHHjggRk1alQ6dOiQ7bbbLqVSKS+88EJOPPHEvPzyy7nggguqjTd69OgcdNBBmThxYtZee+3svvvuSZIxY8bkz3/+c7bbbrsccMABS6z1yCOPzODBg9OqVavssMMOWX311TNx4sQ888wzmTlzZg4//PCl3u9rr72WnXfeOZ988kk22mij9O/fPxMmTMjdd9+de++9N4MGDcohhxyyyHnz58/PbrvtljfeeCP9+vXLrFmzMnTo0Jx++umZMWNGzjvvvKVe+5tCMAoAAABAoZVKpTz88MNJkh49elRrmzhxYpo2bZoXXnghHTt2rNZ20kknZdSoUfnJT36Sc889N02bNk2STJ48OYcffniuvvrq7Lbbbvne976XJFm4cGGOOuqoTJw4Mf/zP/+TP/3pT9WC2FGjRqVZs2ZLrPW9997L4MGD06VLl7z00ktZc801q9rmzp2bl19+uVb3e+SRR+aTTz7Jaaedlj/+8Y8pKytLktx555059NBDM3DgwGy//faLPIb/n//8J3379s17772X1VdfPUkyfPjwbLvttrnkkkty+umnp2XLlkut4ZvAo/QAAAAAFFJ5eXnGjh2bn/70p3nxxRfTtGnTao/JVzrnnHMWCUVfe+21PProo9lyyy3zhz/8oSoUTZK11lorl156aZLkuuuuqzp+77335u233863v/3tXHjhhYvMTu3Ro0c22GCDJdY8ZcqUJMkWW2xRLRRNkmbNmqV3795Lve8nn3wyr732Wjp37pzzzjuvKhRNkoMOOigHHHBAZs6cWa32Sg0aNMhVV11VFYomSa9evbLnnntm9uzZGT58+FKv/00hGAUAAACgUNq0aZM2bdqkXbt26dWrVwYNGpRWrVrlmmuuSdeuXav1LSsrq3EzpieeeCJJstdee6VBg0Ujtp49e6Zly5YZMWJE1bGhQ4cmSY466qg0bNhwuWrfeOON06JFi9x///3585//nA8//HCZx3j66aeTJIceemgaN268SPtRRx1Vrd+XdenSJRtttNEix7t3754k1dZI/abzKD0AAAAAhTJgwIAkX8x+bNWqVb7zne9k3333zRprrLFI3/bt21ebDVppwoQJSZLzzjtvietqzp07t+rzDz74IEkWCV+Xxeqrr56rr746P/rRj3LaaafltNNOS/fu3bPTTjvlqKOOSp8+fZY6RmWYut5669XYXnn8yxtRVerUqVON57Rq1SpJMm/evFrcxTeDYBQAAACAQrniiitq3bemUDT5YlOmJNl2222/VtC5PAYMGJDvfe97+fe//51HHnkkQ4cOzVVXXZWrrroqp556ai666KKvNf6XH63/qppmx/63EowCAAAAwDJaZ511kiR77713fvrTn9bqnMrZlu+9997Xvn779u1z/PHH5/jjj6/aPOqwww7LxRdfnIEDBy6yidSXVa6XOn78+Brbx40bl+T/7nFVtepEvAAAAABQR/r165ckue+++2p9Tt++fZMk//jHP6pmnK4Ileug7r333km+2N1+SXbYYYckyR133JHy8vJF2v/xj39U67eqEowCAAAAwDLq1atXdtpppzz//PP55S9/mc8//3yRPq+99loee+yxqq/33XffbLjhhnnjjTdy2mmnZcGCBdX6jxo1Ku++++4Sr/vyyy/nrrvuyvz586sdnzZtWp5//vkkybrrrrvEMfr165dNN90048aNyznnnJNSqVTVdvfdd+euu+5Ky5YtM3DgwCWO89/Oo/QAAAAABdW04Tc3Gvom11bpqquuysEHH5xrr702gwcPzqabbpq11147n3/+eUaNGpWJEyfmJz/5Sb73ve8lSRo1apQbb7wx/fv3z0UXXZRBgwald+/eKZVKefvtt/P666/n7rvvzvrrr7/Ya44fPz4HHXRQWrdunV69emXttdfOZ599lqeeeiozZszIvvvum969ey+x7rKystxyyy3Zaaed8v/+3//L3Xffnc033zwTJkzIs88+m0aNGuXaa6/Nt771rRX6en3TfPPfYQAAAACsFBdue1B9l/BfrX379nn44Ydz00035c4778yrr76aF154Ie3bt896662XH//4x+nfv3+1c77zne/kmWeeyVVXXZV77rknDzzwQJo2bZrOnTvn17/+dbbddtslXnPbbbfNeeedlyeeeCKjR4/O008/nTZt2qRnz575wQ9+kO9///u1qn3TTTfNiBEjct555+Whhx7K4MGD07p16xxwwAH5zW9+k2222Wa5X5f/FmWlL8+Vpd5VLoy7tLUgAPjm+Nmzt2d+xaLr8hRJy0ZNclHvg5Mk/xzz/ZSX5tVzRfWrSYPVc1C3a5MkDz74YI3rNhVJw4YNs+eee9Z3GQDUIX8/Spo0aJi/9DmsXq5dUVGR0aNHJ0k22mijVWIX8c8++6y+S1ihysrK0rp16/ouo14s6/tzZWZlZowCAAAAUGemT58e8/T4Jvjv/08GAAAAAADLSDAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACFIxgFAAAAAApHMAoAAAAAFI5gFAAAAAAoHMEoAAAAAFA4glEAAAAA6kzr1q2zxhprLNPHitKmTZtqH23btk3nzp2z66675sorr8yCBQtW2LVWphtuuCFlZWU599xzqx0/99xzU1ZWlhtuuKFe6vpv06i+CwAAAACAujRgwIAkSXl5eSZMmJAXXnghw4cPz8MPP5zBgwenUSORWRH4LgMAAAAU1F3v/CALK+bVdxk1atSgafpveO1KGfuKK66o9vXw4cOz7777ZujQobnzzjtz2GGHrZTr8s3iUXoAAACAglpYMS/lpW/mR10Gtr169aqaRfrEE0/U2XWpX4JRAAAAgBWsSYOG9V0Cy2jjjTdOknzyySeLtJVKpQwePDj77bdf1ltvvay99tr57ne/mz/+8Y+ZPXt2jeMtWLAg1113XfbYY4906dIl3/rWt7LlllvmpJNOyiuvvFJt7FtvvTWHH354unfvnhYtWqRVq1bZZpttcsUVV6SiomKl3C8epQcAAACAzJw5M0nSrl27ascrKiryox/9KHfeeWdatmyZzTffPGussUZefvnl/OlPf8pjjz2We++9N82bN686Z9asWTn00EPz3HPPpUWLFtl2222z+uqr5/33388dd9yR1VdfPX379k2SzJs3L0cccUTWXHPNfOc738mWW26ZqVOn5rnnnstJJ52UF154wWZKK4lgFAAAAGAlqut1PMtKDdNx7mFZvUnHTJv7Xho0KPtqj7Rt1rXO6vlv8fjjjydJdtlll2rHL7/88tx5553Zfvvtc80116RDhw5Jkvnz5+eXv/xlbr755vzpT3+qtkP8b37zmzz33HPZbrvtcuONN1YLWydPnpz333+/6utGjRrl7rvvzt57753GjRtXHZ8yZUr22muv3HjjjRk4cGB23HHHlXHbheZRegAAAICVqO7X8Zyf5IvHr0uL+R9fqKioyHvvvZdTTz01zz33XPbaa6/079+/qn3hwoW57LLL0qJFi1x77bVVoWiSNGnSJH/605/SoUOH3HjjjVWPvE+aNCmDBg1K06ZNc+WVVy4yA3WttdZKr169qr5u1KhRDjjggGqhaJK0b98+559/fpLk3//+9wq/d8wYBQAAAKBg2rRps8ixY445JpdccknKyv5vhu3IkSMzderU7LTTTllrrbUWOad58+bZbLPN8sgjj2Ts2LHp1q1bnnnmmZSXl2f33XdP586da13TK6+8kkceeSTjx4/P7NmzUyqVMmPGjCTJ22+/vRx3ydIIRgEAAAAolMod6OfOnZtRo0ZlzJgxufHGG7PNNtvkiCOOqOo3YcKEJMmQIUNqDFO/bOrUqenWrVsmTpyYJOnatXbLFcyfPz/HHntsbr311sX2qQxIWbEEowAAAAAUyhVXXFHt68suuyy//e1v86tf/Srbb7991UzPysfj119//Xz3u99d4pht27Zdrlouvvji3Hrrrdl0001zwQUXZMstt0ybNm3SuHHjjBkzJhtttFFKJcsfrAyCUQAAAAAK7eSTT87QoUPzxBNP5IILLsjll1+eJOnYsWOSpFu3bouEqYuzzjrrJEnee++9WvW/++67kyS33nprevToUa3t3XffrdUYLB+bLwEAAABQeL/97W+TJLfffnvVI/RbbrllVl999Tz33HP59NNPazXO9ttvn4YNG+aJJ57IBx98sNT+leN26tRpkbZ//vOftS2f5SAYBQAAAKDwevbsmb333rtqJ/okadq0aU4++eTMmDEjRx11VMaNG7fIeR9++GFuu+22qq+/9a1v5fDDD8/cuXNz4oknZtq0adX6T5kyJcOHD6/6unv37kmSv/3tb9X6DR48ODfddNOKuj1q4FF6AAAAgIJq1KBpUlHfVdSsUYOmdX7NX//613nggQdyyy235Fe/+lU6dOiQn//853n77bdz++2357vf/W569uyZzp07Z8GCBXn77bczevTo9OjRI4cffnjVOOeff37efvvtPP300+nZs2d69+6dVq1a5YMPPsjIkSMzcODA7LLLLkmS0047LQ899FBOP/303HHHHenevXvefvvtDB8+PL/85S9z4YUX1vnrUBSCUQAAAICC6r/htfVdwjfKpptumn322Sf33ntv/vrXv+Z3v/tdGjRokL/97W/Zb7/9cuONN+bll1/OyJEjs8Yaa2SdddbJz372s/Tv37/aOK1atcq9996b66+/PnfccUeGDRuW8vLyrL322jnkkEOqhag77rhjnnnmmZx55pl5+eWXM2bMmGy66aa58847s+WWWwpGVyLBKAAAAACFUJt1Qhf3+Ppee+2Vvfbaq9bXatKkSX784x/nxz/+8SJtZWVl1b7edttt8/jjj9c4Tk070h977LE59thjFzl+7rnn5txzz611jUUnGAUAAAAolFKmzq3b3c4bpGHaNOuSJJk+fXqNYR/UNcEoAAAAQOHUdTApCOWbx670AAAAAEDhCEYBAAAAgMIRjAIAAAAAhSMYBQAAAAAKRzAKAAAAABSOYBQAAAAAKBzBKAAAAABQOIJRAAAAAKBwBKMAAAAAQOEIRgEAAACAwhGMAgAAAACF06i+CwAAAACgrpWt4terWZs2bZbY3qdPn9x3331VX7/yyisZMmRIRowYkREjRuTDDz9Mknz66afLXUNFRUUGDRqUO++8MyNHjsyMGTPStm3brL322tlmm23Sr1+/HHnkkcs9PrUnGAUAAAAolLKs2Wz9ert669atl/mczz77bIXWMGDAgBqPd+vWrdrXf/7zn/PAAw+ssOvOnz8/RxxxRB5//PE0aNAg22yzTbp06ZJ58+Zl5MiRueaaa3LzzTcLRuuIYBQAAACAQrniiitq1W/rrbdOjx49ssUWW2TLLbfMZpttlnnz5i33da+++uo8/vjj6dSpUx599NFsvPHG1dpHjRqVG2+8cbnHZ9kIRgEAAAAK6pFHHkl5eXl9l1Gjhg0bZrfddqvXGv7nf/5nhY53zz33JElOO+20RULRJOnRo0cuuOCCFXpNFs/mSwAAAAAFVV5e/o3+WNVMnTo1SbLmmmsu87nvv/9+Tj755HTv3j3NmzdP27Zt06tXr/x//9//l88//7xa39mzZ+f3v/99NtlkkzRv3jytW7fOjjvumNtuu63Gsddbb72UlZWlVCrlL3/5SzbbbLOsttpq2Xzzzav6LFy4MFdeeWV69+6d1VdfPc2bN8/mm2+eSy+9NAsXLlzm+/kmEIwCAAAAQB1YZ511kiQ33XRTFixYUOvznn766fTs2TN/+ctfsmDBguy7777p06dPpk+fnnPPPTfvvvtuVd8ZM2Zkxx13zDnnnJPJkydnn332SZ8+ffLCCy9kwIABOeWUUxZ7nZ/85Cf5xS9+kbXWWiv77bdf1l//i7Vo58yZk9122y0nnnhixowZk2233Ta77rprJk2alJ///Oc56KCDUlFRsZyvSv3xKD0AAAAA1IGjjz46Tz31VB5++OFsuOGG6d+/f3r37p2tttoqG2ywQY3nTJs2LQcddFA+++yz/PnPf86pp56aBg3+b67jf/7zn3Ts2LHq6zPOOCMvvfRSdtppp/z73/9Oq1atkiRvvfVW+vbtm8suuyy77rpr9tlnn0Wuddddd+Xll19Ojx49qh3/5S9/mSFDhuSwww7LVVddVbWB1owZM3L44Yfnnnvuyd///vf85Cc/+dqvUV0yYxQAAACAQmnTpk2NHxMmTFip1z3ooIPyhz/8IS1atMiECRNy6aWX5rDDDsuGG26Yrl275o9//GPmzp1b7ZxrrrkmU6ZMyR577JFf/vKX1ULRJOndu3fWWmutJMmsWbNy7bXXpkGDBrniiiuqQtEk2XjjjXPWWWclSf73f/+3xvp+/etfLxKKTp48OVdffXXWXXfdXH/99VWhaJK0atUq1157bZo0aZIrr7xy+V+YemLGKAAAAACFMmDAgBqPt2jRYqVf+8QTT8wRRxyRRx99NE8++WRefPHFvP322xk3blx+85vf5N///neeeOKJNG/ePEny2GOPJUl+/OMfL3Xsl156KXPmzEmvXr1q3NzpqKOOysknn5xnn302FRUVi4Ss++233yLnPPnkk1mwYEH22GOPqpq+bO211063bt3y2muvZc6cOTX2+aYSjAIAAABQKFdcccVKGfeSSy7J22+/Xe1Yt27d8vOf/7zasTZt2uSHP/xhfvjDHyZJxo8fn7/+9a+5+OKLM2zYsFx88cU588wzk3yx6VKSxT5q/2Uffvhhki82U6rJGmuskdatW2f69On59NNPF9kEqnPnzoucM27cuCTJ1VdfnauvvnqJ1582bVrVOqr/DQSjAAAAALACPP7443n22WerHevTp88iwehXdenSJRdccEEWLlyYSy65JPfff39VMLqilZWVLbatWbNmixyr3FRp8803z2abbbbEsZs2bfr1iqtjglEAAAAAWAHuu+++r3X+zjvvnEsuuSSffPJJ1bF11103b731VsaOHZtNN910iedXbsI0fvz4GtunT5+ezz77LM2bN0+bNm1qVVOnTp2SJNtvv33+8pe/1Oqc/xY2XwIAAACAOlAqlZbY/s477yRJtcfRv/e97yVJ/v73vy91/K222irNmzfPSy+9tMgj/Unyj3/8I8kXs1i/ur7o4uy0005p2LBh7rvvvixYsKBW5/y3EIwCAAAAQB0YMGBArrrqqnz66aeLtD3//PP5/e9/nyQ5+OCDq44ff/zxadeuXR588MFceumli4Srw4YNy+TJk5N8sXnUwIEDU1FRkZNOOimzZs2q6jdmzJicd955SZKTTz651jWvs846GThwYMaNG5cBAwbk448/XqTPO++8kzvvvLPWY35TeJQeAAAAoKAaNmxY3yUs1jehtocffjgXXnhh1dfz589Pkuy6665Vx375y19m9913r9V4EydOzOmnn56zzjorm2++ebp27ZqKioqMHTs2r7zySpJk3333rbYDfdu2bXPHHXdkv/32y89//vNcdtll2XrrrTNnzpy8+eabeeedd/Lyyy9nrbXWSpKcf/75GTZsWB599NGsv/766du3b2bNmpUnnngic+fOzcknn5x99913mV6H//3f/824ceNy55135qGHHsrmm2+ezp07Z9asWXnjjTfyzjvvZP/9989BBx20TOPWN8EoAAAAQEHttttu9V3CN9rUqVMzfPjwRY5/+djUqVNrPd6NN96Yxx57LEOGDMm7776bBx98MPPmzUu7du2y995758gjj8zhhx++yAZJ/fr1y8iRI3PBBRfkoYceyr/+9a+0bNkyXbt2ze9+97tqO9a3atUqQ4cOzUUXXZTbb78999xzT5o0aZJevXrlxBNPzIABA5b5dWjevHkefPDB3HLLLbnxxhvzyiuv5IUXXkj79u3TpUuXHHXUUTn88MOXedz6JhgFAAAAoBBqeoR9SY444ogcccQRK+z666+/fn70ox/lxz/+cVq3br1M53bt2jVXXnllrfq2aNEi55xzTs4555xa9R83btxS+zRs2DBHH310jj766FqN+d9AMAoAAABQKKVMnftunV6xQRqmTbMuSb7YGX1pmxBBXRCMAgAAABROXQeTglC+eexKDwAAAAAUjmAUAAAAACgcwSgAAAAAUDiCUQAAAACgcASjAAAAAEDhCEYBAAAAgMIRjAIAAACsQkopr/qsVFGq11rgqyoqKqo+Lysrq8dKBKMAAAAAq5ayUhaUfZ7y0vzMm1O+9P5Qh2bNmpUkadKkSb0Ho43q9eoAAAAArHCzG41L8/IO+fyTpkmSps0bpqxBfYZQpaqZghUVFSmVij2TtaysrNrMySKoqKjIrFmz8vHHHydJWrVqVc8VCUYBAAAAVjmfNxqV5uXrJPOTBR+3TMOyJknqd3be5AajkyTl5WaxJslHH31U3yXUm2bNmmXNNdes7zL+//buPS7n+/8f+OPdVVfnk0MSKUUiOhGmUs1sPo5twxc/5rCNbTmEjzFmmvl8jOU0GzazDNvYnKYNsUklZEq1zLmEFGVKB5W6Xr8/3K7r0+W6oqKDetxvt+v20ev0fr3f+3h5X89eBwZGiYiIiIiIiIgaGyGV4pb+IZiVucCozB56wqxe+yNBBksDewBAQUEBZ4xKEszNzeu7G3VOLpfD1NQUzZs3h0wmq+/uMDBKRERERERERNQYCakUeXpnkKd3BhASJNRfIEquY4reHTYAACIiIpr8rFGZTIaePXvWdzfqlCRJ9b6n6KOaxOFL9+/fx0cffQQnJycYGBjAxsYGkyZNQkZGRrXbunv3LmbMmAE7Ozvo6+vDzs4OwcHByM3NffYdJyIiIiIiIiJ6FiQBIZXV46ccOjo60NFpEqGoKlE+j6byaWhBUaAJBEaLi4vx4osv4pNPPkFBQQGGDRsGW1tbhIWFwcPDA6mpqVVuKycnBz179sTnn38OXV1dBAYGwtTUFGvWrEGvXr3wzz//1OKdEBERERERERER0bPS6AOjS5YswcmTJ/HCCy/g4sWL2LFjB+Li4rBixQpkZ2dj0qRJVW4rODgYly9fxmuvvYYLFy5gx44dSElJwbRp03Dx4kXMmjWrFu+EiIiIiIiIiIiInpVGHRgtLS3FF198AQD48ssvYWJiosqbNWsWXF1dERUVhfj4+Ce2lZmZiR9//BFyuRzr1q2Dru7/tmf97LPP0LJlS2zbtg23b99+9jdCREREREREREREz1SjDozGxsYiLy8Pjo6O8PDw0MgfPnw4ACA8PPyJbR08eBAKhQK+vr5o1aqVWp6+vj6GDBmC8vJy7N+//9l0noiIiIiIiIiIiGpNow6MJiUlAQA8PT215ivTk5OT67QtIiIiIiIiIiIiql+6Ty7y/Lp27RoAoG3btlrzlenp6el12hYRETUu+rJG/c9plVR8Bro6+oCiHjvTAOjqyFV/lslk9diThoHPgIio6eH7Ed+PHsX3I3V8Bg1Dox6pCgoKAABGRkZa842NjQEA+fn5ddoWALi4uGhNP3/+PHR1ddGpU6cqtUNERNRQ7MLc+u5Cg/I++G85ERFRU8f3I3V8P6KauHr1KuRy+ZML1kCjDow+jyRJglwuVzvcieh5cOXKFQCAo6NjPfeEiJo6jkdE1FBwPCKihoLjET3P5HK5akLis9aoo2/KU+iLioq05hcWFgIATE1N67QtADh79myVyhE9L5SzoPn/bSKqbxyPiKih4HhERA0FxyMi7Rr14Uvt2rUDANy4cUNrvjLdzs6uTtsiIiIiIiIiIiKi+tWoA6Nubm4AgISEBK35ynRXV9c6bYuIiIiIiIiIiIjqV6MOjHp7e8Pc3BxXrlxBYmKiRv7OnTsBAEOGDHliWwMGDICOjg5iYmJw+/ZttbySkhKEh4dDJpNh4MCBz6TvREREREREREREVHsadWBULpdj6tSpAICgoCDVPqAAsHLlSiQnJ8PPzw/du3dXpX/xxRdwdnbGBx98oNZW69atMXr0aJSWluK9995DWVmZKu/9999HdnY2xo4dCysrq1q+KyIiIiIiIiIiInpajfrwJQD48MMP8fvvv+P48ePo2LEjfH19kZ6ejri4OLRs2RLffvutWvmcnBxcuHABmZmZGm2tXr0aJ0+exK5du+Ds7IwePXrg7NmzSElJQceOHbFy5cq6ui0iIiIiIiIiIiJ6Co16xigAGBgYIDIyEgsXLoSRkRH27t2L9PR0TJgwAQkJCXBwcKhyWy1atMCpU6cwbdo0lJaWYs+ePcjLy8P06dNx6tQpNGvWrBbvhKhhO3v2LE84JKIGgeMRETUUHI+IqKHgeESknSSEEPXdCSIiIiIiIiIiIqK61OhnjBIRERERERERERE9ioFRIiIiIiIiIiIianIYGCUiIiIiIiIiIqImh4FRIiIiIiIiIiIianIYGCUiIiIiIiIiIqImh4FRIiIiIiIiIiIianIYGCVqpM6cOQNJktCmTRut+QqFAhYWFpAkCZMnT9ZaJjo6GpIkwcXFpVb6ePToUUiShAkTJlSrnr29PSRJqpU+EdGzFRkZiddffx1t2rSBXC6HpaUlOnXqhBEjRuCLL75AXl5efXeRiJ4zkiRpfPT09GBjY4PXX38dx48fr5d++fv7Q5IkXL16tV6uT0QNQ229+1Tlu1NsbCwkSYKdnZ3WsfJxn5CQkJrdcBVwfKSGTLe+O0BEtcPNzQ1mZma4efMmUlNT4eDgoJb/119/qf5RPnbsmNY2YmJiAAC+vr6121kiapQWL16MRYsWAQA6d+6MXr16QU9PDxcuXMDu3buxc+dO9OjRA717967nnqo7evQoAgICMH78eGzevLm+u0NElRg/frzqz/n5+UhKSsLu3buxZ88ebNu2DWPGjKnH3hFRU1Tf7z7h4eEAgC1btiAsLEwj/7vvvgMAvP766zAxMVHLc3d3r5U+ETV0DIwSNVI6Ojro06cPDh48iGPHjmkERpVBTzc3NyQnJ+POnTto3ry51jIMjBJRdcXHxyMkJAR6enr46aefEBgYqJaflZWFbdu2wcLCol76R0TPv0d/caFQKDB//nwsW7YM06dPx4gRI6Cnp1dn/dmyZQuKiooqXa1DRI1bbb/79OzZE+fOnYO5uXmlZfbt24c2bdqgb9++8PPz08hXBkZDQ0Nhb29fo34QNTZcSk/UiCkDmtpmhB47dgx6enoIDg6GEAKxsbFq+QqFAidOnFBrh4ioqnbv3g0hBEaOHKnxxQAArK2t8e9//xvOzs513zkiapR0dHSwePFi6Orq4s6dOzh79mydXr9du3Zwdnau02AsETUctf3uY2RkBGdnZ7Ru3Vpr/pUrV3Du3DkMHjyY244RVQMDo0SNmDKgqZz5WVFMTAw8PT3Rv39/rWWSkpJw7949tGvXDu3atQMA3LlzB3PmzEHHjh1hYGCAZs2aYcCAATh06JDW60uSBHt7e5SWlmLx4sVwdnaGvr6+1heFR92/fx8LFixA+/btYWBgAEdHRyxatAilpaXVeQREVE+ys7MBAC1btqxyHeX+wUIIrFmzBl26dIGBgQHatGmD6dOnIzc3V2u9oqIifPLJJ+jatSsMDQ1hbm6Ovn37Yvv27U+8ztq1a+Hm5gYjIyO4u7tjwoQJCAgIAPBwVkVle2+lpKRg7NixcHBwgIGBAVq2bAl3d3cEBwcjMzOzyvdMRM+WXC5XzaYqKytTy1O+l2izefNmrXvsFRQUYOnSpXBzc4O5uTlMTEzg6OiIESNGICIiQq1sZXvoKa9bXl6OZcuWwcnJCfr6+rC1tcXcuXNRUlKitU9FRUVYunQpPDw8YGJiAhMTE/Tu3Vs14+tR6enpePfdd+Hk5AQjIyM0a9YMLi4umDJlCi5cuKBWlmMY0bNXk3cfACgsLMSyZcvQo0cPmJmZwdjYGM7OzggKCsLFixdV5Z60x+i+ffsAAEOGDKlR/69fv46pU6fC0dFR9V1v8ODBj923+dy5c3jzzTdhb28PfX19WFlZwdvbG6GhoRpjsNLevXvRu3dvGBsbo1mzZhg9ejRu3LhRoz4TPQtcSk/UiPXs2RP6+vq4cOECcnJy0KJFCwBAamoqbt68idGjR6NNmzaws7PTmFX66DL6jIwM9O3bF6mpqWjXrh0CAwORnZ2N33//HREREVi5ciVmzpyp0QeFQoHAwEBER0fDz88Prq6uGkv2H1VaWopXXnkFMTExsLS0xKBBg1BSUoLPPvsMZ86cgRDiWTweIqpFtra2AIBdu3bhgw8+gJWVVZXrTps2DV9//TX8/f3RrVs3REVFYe3atYiKikJMTAzMzMxUZfPz8xEQEID4+Hi0bNkSgwcPRmFhIY4cOYKYmBicOHECa9as0Xqdd955B2FhYfDz80Pnzp1RWloKHx8fZGVlISIiAo6OjvDx8VGVV+69FR8fDx8fHxQXF8PV1RXDhg1DUVERUlNTsWbNGgQGBlY6m4OIaldaWhru3LkDPT09dOjQ4anaKi8vx0svvYS4uDi0aNEC/v7+MDAwwI0bN7B//34YGxvjlVdeqXJ7Y8aMwf79++Hv749OnTohJiYGy5cvR0ZGBrZt26ZW9vbt2+jfvz+Sk5NhbW0NPz8/CCFw/PhxTJgwAadPn8batWtV5a9fvw5PT0/8888/6NixIwYOHIjy8nKkp6dj48aNeOGFF9CpUycAHMOIaktN3n0yMzPRv39/nD17FpaWlvD394e+vj5SU1OxYcMGdOzYEU5OTlW6fnh4OIyMjNCvX79q9/3EiRMYNGgQ7t69i06dOmHQoEHIzs5GREQEDh48iO+//x7/93//p1bn559/xrhx41BSUoLOnTvj1VdfRV5eHs6ePYs5c+bgrbfe0tg2YN26dVi5ciV8fX0xcOBAxMXFYfv27YiPj0dSUhIMDQ2r3XeipyaIqFHz8fERAMTevXtVad99950AIPbs2SOEEGLMmDFCLpeLoqIiVZkRI0YIAGLDhg1CCCEGDx4sAIgxY8aIkpISVbmYmBhhZGQkZDKZOHPmjNq1AQgAokOHDuLGjRsafYuMjBQAxPjx49XSP/30UwFAeHh4iJycHFX6pUuXhI2NjapdImq4rly5IgwNDQUAYWpqKsaPHy82btwoEhISRFlZmdY6dnZ2AoAwMzMTp0+fVqXn5+eLF198UQAQM2bMUKszdepUAUAEBASIe/fuqdLPnTsnrKysBAARHh6u9TotWrQQKSkpGv2obGxSeuONNwQAERoaqpF37tw5cfPmzcoeCxE9A9reA/Lz80VMTIzo0aOHACCmT5+utZ6dnZ3WNsPCwgQAsWjRIlXakSNHBADh5eUl7t+/r1Y+Ly9PbZwSQgg/Pz8BQKSlpWntb+fOnUVmZqYqPTU1VVhYWAgA4vLly2p1Bg4cqBrziouLVelZWVmqezxw4IAq/aOPPhIAxNSpUzXuLT09Xa19jmFEtaMm7z79+vUTAMTIkSNFfn6+Wl5aWppISkpS/fy495O7d+8KXV1dMWzYsMf2UTkeVRyn8vLyROvWrYVMJhPbtm1TK//nn38KS0tLYWJiIm7fvq1Kv3jxojAwMBC6urri+++/V6ujUChERESE2tilHB+NjIzE8ePHVemFhYWiT58+AoDYtGnTY/tOVFu4lJ6okdO2nF75Z29vb9X/lpaWIi4uTlVGOYPU19cXqamp+PXXX2FiYoK1a9dCLperyvn4+OCdd95BeXk5vvzyS619WLp0abUOIli3bh0AYMWKFWqzSzt06ICFCxdWuR0iqj8ODg4IDw+Hra0t8vPz8d133+Htt9+Gp6cnWrRogffee6/S5ZpTp05F9+7dVT8rxx5JkrBp0yYUFxcDeLj0bNOmTdDR0cG6detgamqqquPs7IwPP/wQACqdMTp37ly4uLhU+96US+VeeukljbzH7f1FRM9Wxa0uTE1N4evriwsXLmDt2rVYvXr1U7ev/Lvu7e0NAwMDtTwzMzO1caoqPv/8c1hbW6t+bt++PcaOHQtA/T0tMTER+/fvh5eXF1auXAl9fX1VXqtWrfD1118DANavX6/RV23jUrt27eDo6FilshzDiGquuu8+p06dwh9//AErKyt88803GqfE29vbw9XVtUrXPnDgAMrKyjB06NBq9/vbb79FZmYmgoOD8f/+3/9Ty+vRowcWLlyIgoICtZntq1atQnFxMd566y2MGTNGrY4kSXj55ZfVxi6lmTNn4oUXXlD9bGRkhFmzZgEAoqOjq913omeBgVGiRk7bAUzHjh2Dk5OTav8bZYBUWebKlSvIzMxE8+bN0blzZ1X6gAED0KxZM41rjBs3DoD2vUwlSarWPjfXrl3DtWvXYGVlpdrnr6LRo0dXuS0iql/9+vXD5cuXsXv3brzzzjvw9PSErq4ucnNzsX79eri7u2vsewcAo0aN0kjr0qUL3NzcUFBQgDNnzgB4uBz0/v378PT01HqQgXJsio2NhUKh0MivyZcHAKpgSFBQEI4ePVrpHlpEVLvGjx+v+owaNQovvPACCgsLsXjxYhw8ePCp23d3d4eOjg7CwsKwceNG3Llzp8Zt6enpaX2vUS6RrRgsUe7dHhgYCB0dza9ryj1HT506pUpTjkvz58/Hr7/+qvoFkjYcw4hqT3XefX7//XcAD7/fVPzlbk3s27cPOjo6GDRoULXrKsec1157TWu+8vtkxTFH2fcpU6ZU61ovv/yyRpq2cZCoLjEwStTI9enTBzo6OkhISMD9+/eRnZ2N8+fPq+2b161bN5iZmakCoMoAp4+PDyRJws2bNwGg0gMLlOkZGRkaeVZWVlp/W1gZ5bXs7Oy05pubm2vsVUNEDZdcLserr76K9evXIz4+HtnZ2Vi/fj0sLS1x+/ZtTJ06VaNOZX//lWONcpx40thkYWEBc3Nz3L9/H3fv3tXIVx4sV11z5syBv78/YmNjERAQAEtLS7z88stYs2YN8vLyatQmEVXf5s2bVZ8ff/wRx48fx+nTp1FcXIyhQ4dq/cVLdTg5OWH58uUoKirC5MmTYWVlBTc3N8yaNQvJycnVasva2hoymUwjXRkMqXgAk/LwpgULFqjNiq34KSgoQE5OjqrOhAkTMHLkSPz9998YMmQILC0t0bdvX/z3v/9FVlaW2jU5hhHVrqq++1y/fh0A1GZ010RZWRkOHjyInj17olWrVtWurxxzvL29tY43Xl5eAKA25tS0723bttVI0zYOEtUlHr5E1MiZm5vD1dUViYmJOHnypOpU54qBUR0dHfTu3RsnTpxAeXm5xsFLTyJJUqV5jy49I6KmzcLCAu+88w5sbGwwbNgwREZGoqioCEZGRrVyvdoYn8zMzHDkyBHExsYiPDwcR48exZEjR3D48GEsXboUMTEx6NixY027TERPwcPDA1OmTEFoaCjWr19f5SX12maVA8Ds2bMxcuRI7N27F4cPH0ZMTAxWrVqF1atXY9WqVZgxY0aV2tc28/NJffHx8aly0EEmk2HHjh2YN28efvnlFxw5cgRxcXGIiYnBp59+ioMHD6JPnz4AOIYR1bXK3n2elejoaOTm5tb4NHrlmDN8+HAYGxtXWk7b6pzqqs5YSFRXGBglagJ8fX2RmJiIY8eOaQ2MAg9/Q3jo0CEkJyer7S8KADY2NgCA9PR0re0rf8tYnX1EK6Pc16qya927d091D0T0/HrxxRcBPDz1OTc3Vy0wmp6ejm7dumnUUY4LyjHpSWNTXl4ecnNzYWhoCEtLy2faf0mS4OPjoxpLb9++jeDgYPz4449YsGABfvrpp2d6PSKquvbt2wMALl26pJaup6eHgoICrXWUs5+0sbW1xbRp0zBt2jSUlZVh+/btmDhxIt5//3288cYbz3x8Uc6oCgwMxOzZs6tV18PDAx4eHggJCcG9e/cQEhKCVatWITg4WG0ZLMcworr36LuP8hT7K1euPFW74eHhAGq+RVDbtm1x4cIFzJs3r8p7J9va2uLSpUu4cuUK3N3da3RdooaC4XqiJqDiPqMxMTFo1aqVxkwA5T6ju3fvxsWLF2FsbAxPT08A/wuiHjx4UGtQUrkRd1VnmD6OnZ0dbG1tcfv2bURFRWnkb9++/amvQUS1Twjx2PzLly8DeLjcrEWLFmp52r6Qnz9/HomJiTAxMVG9gHfv3h2GhoaIj4/XCIAA/xubvL29qzVDQXnAXHX23bOyskJISAgAICUlpcr1iOjZS01NBQCNg0xat26NO3fuaN0rVLlf3pPo6upi7Nix8PLyQmlpqdax52n1798fALBnz56nasfMzAxLly6FJElPHJc4hhE9veq++ygPQPvxxx8r/aVNVYSHh6N9+/bo2rVrjerXZMxR9l15GBzR84yBUaImQBmwPH78OM6cOaMKglbUq1cvyGQy1cnyvXv3hq7uw0nlDg4OGDRoEPLz8zFjxgw8ePBAVe/EiRNYv349ZDIZgoKCnkl/3333XQAPl6/9888/qvTU1FQsXrz4mVyDiGrXwoULMWfOHK2zIDIyMlSb9Q8dOlQViFRau3at6oAlACgqKsK0adMghMDEiRNhaGgIADA2NsakSZOgUCgQFBSEwsJCVZ2LFy9iyZIlAIDp06dXq+/KmaiV7U+4YcMGpKWlaaTv378fAFQzQIio7p05c0b1RX3gwIFqeX5+fgCgGhuUli9frnZIpVJkZCR+//13jWX2aWlpOHfuHCRJ0rpf3tPq1asX+vfvj9jYWAQFBeHevXsaZZKSktQOmNq6davWgOaBAwcghFAblziGEdWO6r779OzZEwEBAbh9+zYmT56s9h4DPFyV99dffz32mn///TeuXLlS42X0wMMDlKysrLB8+XJ8/fXXGmNeWVkZIiIi1MaY4OBgGBgYYOPGjdixY4daeSEEDh8+zD1D6bnBpfRETYC1tTU6dOig+i3lo8vogYezKtzc3JCQkABAc/bnV199BV9fX2zZsgVRUVF44YUXkJ2djaNHj6K8vBwrVqx4ZssoZs+ejd9++w2xsbHo0KEDXnzxRZSUlOCPP/5Av379IJPJcO3atWdyLSKqHQUFBVizZg1CQ0Ph5OSELl26wMDAADdu3EBcXBwePHiADh06aN3/b+zYsejVqxdefPFFmJubIzo6GllZWXBxccEnn3yiVnbp0qU4efIkDh8+DAcHB/j5+aGwsBBHjhxBcXExpk+fXu0vC/b29nB1dcXp06fRs2dPuLi4QCaTYejQoRg6dCg2bNiAd999F126dEHnzp2hq6uL8+fPIykpCQYGBvjoo4+e5tERURVNmDBB9efS0lKkp6fj5MmTUCgUGDJkCMaNG6dWfu7cudi5cydWr16No0ePwtHREX/99ReuX7+O9957D+vWrVMrn5SUhJkzZ6Jly5bo3r07mjdvjuzsbERFRaGkpATTpk1T/SLlWdu2bRsGDBiAdevW4YcffoC7uztsbGyQl5eH5ORkXL9+HTNmzMCAAQMAALt27cIbb7wBR0dHdOvWDYaGhkhLS0NcXBx0dHTUgsEcw4hqR03efbZu3Yp+/frhxx9/REREBHx8fKCvr48rV64gMTERK1as0Lq9kNK+ffsA4KkCoxYWFvjll18wZMgQTJkyBUuWLEHXrl1haWmJrKwsJCQkIDc3F3v27FHNSnVyckJYWBjeeOMNjBo1CosXL4arqyvy8vKQkpKC69ev4+7du9U6hJeo3ggiahImTpwoAAgA4tSpU1rLTJs2TVXmjz/+0MjPyckRs2fPFo6OjkIulwsLCwvx8ssvi4iICK3tARB2dnaV9ikyMlIAEOPHj9fIKywsFB988IFo166dkMvlwt7eXsyfP1+UlJQIOzs7weGLqGHLzs4WW7duFWPHjhXdunUTzZs3F7q6uqJZs2bC29tbLF++XBQUFKjVUf7dLi8vF6GhocLZ2Vno6+uL1q1bi6CgIPHPP/9ovVZBQYH4+OOPRZcuXYS+vr4wNTUVPj4+4ocfftBavipjyKVLl0RgYKBo3ry50NHREQDEokWLhBBC7Nu3T0yaNEm4uLgICwsLYWRkJJycnMRbb70lzp8/X/2HRUTVonxXqfjR0dERzZo1E/7+/mLTpk2ivLxca90TJ04If39/YWRkJMzMzMS//vUvkZiYKMLCwtT+ngvxcBz48MMPhbe3t2jdurWQy+WiTZs2ol+/fmLXrl1CoVCote3n5ycAiLS0NI3+VvY+pO26Svfv3xeff/656NOnjzA3NxdyuVzY2toKPz8/8dlnn4nr16+rykZFRYmgoCDh7u4umjdvLgwMDISDg4MYNWqU+PPPP9Xa5RhGVDtq8u4jhBD37t0TixcvFq6ursLQ0FCYmJgIZ2dnMXXqVHHp0iVVOW3fnfr06SPMzMxEaWlplfqoHDMfHaeEECIzM1O8//77wsXFRRgZGQkjIyPh6Ogohg0bJjZv3izy8/M16iQlJYmxY8eKNm3aCD09PWFlZSW8vb3FihUrxIMHD1TlKhsfhRAiLS1NABB+fn5VugeiZ00S4gkbYRARERHVAXt7e6Snpz9xjy4iIiKipi47OxvW1tYYPny4xnJ2Iqo6LqUnIiIiIiIiInqO3L17FwsXLsS//vWv+u4K0XONM0aJiIioQeCMUSIiIiIiqks8lZ6IiIiIiIiIiIiaHM4YJSIiIiIiIiIioiaHM0aJiIiIiIiIiIioyWFglIiIiIiIiIiIiJocBkaJiIiIiIiIiIioyWFglIiIiIiIiIiIiJocBkaJiIiIiIiIiIioyWFglIiIiIiIiIiIiJocBkaJiIiIiIiIiIioyWFglIiIiIiIiIiIiJocBkaJiIiIGgF/f39IkgRJkjB//vxKy/3666+QJAn29vZ117kG4OrVq5g/fz569eoFKysryOVyWFpawsvLC7Nnz0ZycvIzu9bq1asREhKCq1evPrM2iYiIiOjZY2CUiIiIqJH5/PPPcevWrfruRoMghMCiRYvg5OSEpUuX4tSpUzAxMYG7uzusrKyQlJSElStXws3NDdOmTXsm11y9ejU+/vhjBkaJiIiIGjgGRomIiIgaEZlMhsLCQvznP/+p7640CFOmTMHixYuhUCgwd+5c3Lx5E6mpqTh16hQuXLiAnJwchIWFwdHRETExMfXdXSIiIiKqQwyMEhERETUiY8eOBQB89dVXuHbtWj33pn59//332LhxIyRJwk8//YRPP/0UrVu3VitjZmaGCRMmICUlBePHj6+nnhIRERFRfWBglIiIiKgR6dGjB1599VWUlpYiJCSkRm389ttvGDZsGKytrSGXy2FtbY3hw4cjLi5Oo+yoUaMgSRK++eYbjbyXXnoJkiShWbNmUCgUanmJiYmQJAnt27dXS8/IyMC0adPg5OQEAwMDGBkZoV27dujXrx+WLVuGBw8eVOkeFAoFPv74YwDAxIkT8dprrz22vIGBAWbOnKmWlpKSgo8//hg+Pj5o27Yt5HI5WrRogZdffhm7du3SaGPz5s2QJAnp6ekAgICAANW+r5IkYfPmzWrlS0tLsW7dOvj6+qJZs2bQ19eHg4MDgoKCcOPGjUr7mpGRgTfffBM2NjYwMDBAx44dsXDhQhQXF2PChAlar6UUFRWFV199Ve2/7WuvvVbpbFnlPfn7+6OsrAyhoaFwc3ODsbExLCwskJycDEmSYGZmhqKiokr7PGPGDEiShMmTJ1dahoiIiKiuMTBKRERE1MgsWbIEOjo62LJlCy5cuFDlegqFApMmTcLgwYOxb98+KBQKdO3aFSUlJdi1axe8vb3x7bffqtXx9/cHABw9elQtvbS0FMePHwcA3L17F0lJSWr5kZGRavUB4Nq1a/D09MQXX3yBq1evwsHBAS4uLigrK0NkZCTmzZuHwsLCKt3Ln3/+iUuXLgFAjfcODQ4ORkhICP766y+YmJjAzc0NBgYGOHz4MIYPH445c+aolW/VqhW8vb2hr68PAOjatSu8vb1Vn1atWqnK3r59G97e3ggKCsLx48dhZmYGZ2dnZGZmYt26dXB3d0d8fLxGny5evAhPT098++23yMnJQZcuXaCrq4slS5YgICAApaWlld7P0qVL4e/vj71790KhUMDNzQ3l5eXYs2cP+vbti88++6zSukIIBAYGYs6cOcjPz0eXLl1gZmYGV1dX9OzZE/n5+fj555+11i0tLcX3338PAHjzzTcrf+BEREREdU0QERER0XPPz89PABBr164VQggxbtw4AUCMGDFCrVx4eLgAIOzs7DTaCAkJEQBEx44dRVRUlCpdoVCI9evXC5lMJuRyuTh79qwq79y5cwKAsLGxUWsrKipKABBt2rQRAMSKFSvU8ocOHSoAiM2bN6vSpk+fLgCIl156SWRnZ6uVz8rKEqtWrRKFhYVVeh6hoaECgLCwsBAKhaJKdR71888/izNnzmikx8fHCycnJwFAxMbGauTb2dkJACIyMrLStgMCAgQAMWDAAJGamqpKLygoEG+//bYAINq3by9KSkpUeQqFQvTo0UMAEH369BEZGRmqvISEBGFjYyP09PQEABEWFqZ2vUOHDgkAQpIkERoaKsrLy4UQQpSVlYlPP/1UlffHH3+o1QsLCxMAhEwmEy1atBDR0dGqvKKiIiGEEBs3bhQARN++fbXe608//SQACBcXl0qfBxEREVF94IxRIiIiokYoJCQEenp62LlzJ86cOfPE8nfu3MHy5cuhr6+PX375BX379lXlSZKEd955B9OnT0dpaSlWr16tynN2doa1tTVu3ryJixcvqtKVM0Lnz5+v9jPwcGaqcul2QECAKl05u3Xq1Klo0aKFWv9atWqF4OBgGBkZVen+MzIyAAD29vaQJKlKdR41fPhwuLu7a6R7enriyy+/BABs3bq12u0eOHAAkZGRcHZ2xu7du9W2EzA2NsaGDRvQo0cPpKWlYefOnaq8yMhInD59GkZGRti5cydsbGxUeR4eHti8eXOlWw3897//BfBw64PZs2dDR+fh1wCZTIa5c+fi9ddfhxACS5Ys0Vq/vLwc69evh6+vryrN0NBQ1aaJiQliYmJw+fJljbrKWcacLUpEREQNDQOjRERERI2Qg4MD3nzzTQghsGDBgieW379/P4qKiuDr64vOnTtrLRMYGAhAc9m8tuX0R48ehY6ODkaPHg0nJydER0ejvLwcwMP9Re/evYv27dujXbt2qjrKP+/evbvKe4lWJj8/HwBgYmLyVO1kZWVh1apVGDNmDF566SX4+PjAx8cHH3zwAQBUKej8KGWwc+zYsargYkU6OjoYMmQIAPVnevDgQQDAwIEDNQ6RAoD+/fvDzs5OI72wsFAViA4ODtbap9mzZwMAYmJitO4VamZmhldffVVrXRMTE4waNQpCCI2tFjIyMnDo0CHI5XKMGzdOa30iIiKi+qJb3x0gIiIiotqxcOFCfPfddzhw4ACOHTsGHx+fSssmJycDAM6ePVtpueLiYgDQOBjI398f27dvR2RkJCZPnozi4mKcOHECbm5usLS0REBAAL766iskJCTAy8tLFeyruL8oAEyfPh1btmzBli1bcODAAQwYMADe3t7w8/ODs7Nzte7d1NQUAFBQUFCtehXt2LEDb7755mP3Nb1z506121U+623btuHAgQNay9y6dQuA+rNWzsh1c3OrtG03NzfV4U9Kly9fVgWlu3btqrVet27dAABlZWW4fPkyXF1d1fKdnJwgk8kqve7bb7+Nb775Blu2bMEnn3yiKrt582YoFAoMGTJEYxYwERERUX1jYJSIiIiokbKxsUFQUBBCQ0OxYMECREVFVVo2NzcXAJCZmYnMzMzHtnv//n21n5XL4ZUBzxMnTqCkpESV7u/vj6+++gqRkZFqgdGKy+iBh0G72NhYhISE4PDhw9i6datqqXrXrl2xbNkyDBw4sEr33qZNGwDA1atXIYSo9nL6tLQ0vPHGGygtLUVQUBDGjx+Pjh07wtTUFDKZDKmpqXB0dKzRzFblsz5//vwTy1acvakM8iqDvtpoy6s4e7ayrQhMTExgbGyMwsJCVfmKjI2NH9vPnj17wtXVFcnJyYiIiFD9d9q8eTMALqMnIiKiholL6YmIiIgasXnz5sHMzAzR0dGIiIiotJxyyfl7770HIcQTPxU5OTnBxsYGWVlZOH/+vEbgUzkzNDIyEgqFAtHR0WrpFXXv3h3h4eG4e/cuIiMjsXjxYnTr1g0pKSkYOnQoTp48WaX7Vs56zc3NRVJSUpXqVLRjxw6UlpZi+PDh+OKLL+Dl5QULCwvVTMiazBRVUj7rn3766YnPueJSemU9bYFLJW15FWfPalsmr8xTzox9XOD1cd5++20A/9tTNDo6GpcvX0bbtm3xyiuv1KhNIiIiotrEwCgRERFRI9a8eXPMmjULAPDhhx9WWk65xDolJaVG1/Hz8wPwMPgZGRkJmUymOsDJ2toazs7OOHbsGP7880/k5eXBwcEBtra2lbZnaGgIf39/LFy4EElJSRg0aBDKy8uxcePGKvXHy8sLHTp0AACsXbu22veTlpYGAGqHUFX0uADtk2an1vRZOzk5AfjfUnxttOU5OjqqArqVXVOZrqurq3pu1aXcM3Xfvn3IyclRBUgnTJigOuyJiIiIqCHhGwoRERFRIzdr1iy0aNECp0+fxq5du7SWGTx4MAwMDBATE4M///yz2tdQzg7dv38/4uLi4OnpCTMzM7X8goICfPbZZ2rlq0KSJPTu3RsAcPPmzSrV0dHRwUcffQQACAsLw+7dux9bvri4GKtXr1b9rFxyrm1bgeLi4scGW5UHKj265YDSiBEjAACbNm1CXl7eY/tV0YABAwA8fMbKPUgr+uOPP3D16lWNdBMTE9Vp8hXvsaKVK1cCeBgIrmy5/ZNYWFjg9ddfx4MHD7Bu3Trs3LkTkiRh4sSJNWqPiIiIqLYxMEpERETUyJmammLevHkAoNqz81GtWrXCvHnzIITA4MGDsXfvXo0l8+np6QgNDcWmTZs06iuXxf/2228oLS3VCHwq85UBSm3L6KdMmYIffvhBYzn4xYsXVXtVdu/e/bH3WtG4ceMwceJECCEwcuRIfPDBB8jKylIrU1BQgG3btsHV1VV1DeB/M0XXrVunFii+ffs2hg8fjuvXr1d6XUdHRwCodE/XIUOGICAgABkZGejfvz8SExPV8oUQSEhIwMyZM9WuHRAQAC8vLxQWFmL48OFqQdvExERMmDABenp6Wq85f/58AMD27duxevVqKBQKAIBCoUBoaCh+/vlnSJL02FnFVaFcTv/JJ5+gsLAQ/v7+cHBweKo2iYiIiGqNICIiIqLnnp+fnwAg1q5dqzX//v37ok2bNgKAACDs7Ow0yigUCjF16lRVGUtLS9GjRw/RvXt3YW1trUpftGiR1mtUbP/AgQNqebdu3VLlARDXr1/XqO/m5iYACJlMJpycnESvXr1Ex44dhSRJAoDo1q2byM3NrdZzKS8vFwsWLBC6uroCgJAkSTg6OoqePXsKZ2dnIZfLVekzZsxQ1SsrKxO+vr6qPCcnJ+Hh4SH09PSEXC4XX3/9daXP8fvvv1fdp5OTk+jbt6/w8/NTeyY5OTmq9gEIW1tb0atXL+Hm5iZMTU1V6ZGRkWptX7hwQVhZWQkAQk9PT3h4eIguXboIAKJXr15i9OjRAoDYsmWLRr/+85//qNq1srISXl5eomXLlqq0ZcuWadQJCwsTAISfn1+Vn3mnTp1UbW7btq3K9YiIiIjqGmeMEhERETUBBgYGWLhw4WPLSJKEtWvXIjo6GmPGjIGpqSn++usvpKSkwNDQECNHjsQPP/yg2rP0UcpZoLq6uqrDj5SsrKzg4uIC4OGMyrZt22rUX7VqFWbOnAkPDw/cu3cP8fHxyMrKgpeXFz799FOcPHkS5ubm1bpvHR0dLFmyBBcvXsS8efPQvXt35ObmIiEhAVlZWXB1dcWcOXOQkpKitsxcJpPhwIED+Pe//w1bW1ukpaXh5s2bGDJkCE6cOIH+/ftXes0xY8bgyy+/hIeHB27cuIHo6GhERUWpzVZt3rw5IiMjsXXrVgwYMADFxcWIj4/HlStX0K5dO7z77rs4dOiQxnN0cnJCfHw8Jk2ahObNm+Pvv/9GcXEx5s2bhyNHjuDBgwcAoLaNgdL8+fMRGRmJwMBAAMCZM2cgSRICAwMRFRWF999/v1rPtjKTJk0C8L+l9UREREQNlSTEI2ukiIiIiIjoudS1a1ecPXsWiYmJcHNzq5c+zJo1C6tWrcJ7772HL7/8sl76QERERFQVDIwSERERETUCcXFx6N27N5o1a4Zbt25BV1e3zvtQXFwMW1tb5OTkICEhAR4eHnXeByIiIqKq4lJ6IiIiIqLnxKVLl/D5558jNzdXLT02NhYjR44EAEyePLlegqIAsHz5cuTk5KBPnz4MihIREVGDxxmjRERERETPidOnT8PLywsymQxOTk4wMzNDRkYGbty4AQDo06cPfv/9dxgaGtZZnxITExEcHIxbt27h/PnzkCQJR48eRd++feusD0REREQ1wRmjRERERETPCUdHRyxYsACenp64c+cOEhIScO/ePfTu3RurV6/GkSNH6jQoCgC5ubmIiopCamoqXF1dsXPnTgZFiYiI6LnAGaNERERERERERETU5HDGKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk8PAKBERERERERERETU5DIwSERERERERERFRk/P/AdxyHQE450rEAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Show the per-class metrics bar chart\n", + "from IPython.display import Image, display\n", + "display(Image('results/per_class_metrics.png'))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cbaa4e56", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total test examples : 50\n", + "Correct predictions : 13\n", + "Overall accuracy : 0.2600\n", + "\n", + "Sample predictions:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
true_label_idpred_label_idtrue_labelpred_labelcorrect
011SportsSportsTrue
121BusinessSportsFalse
211SportsSportsTrue
321BusinessSportsFalse
431Sci/TechSportsFalse
501WorldSportsFalse
621BusinessSportsFalse
721BusinessSportsFalse
831Sci/TechSportsFalse
921BusinessSportsFalse
\n", + "
" + ], + "text/plain": [ + " true_label_id pred_label_id true_label pred_label correct\n", + "0 1 1 Sports Sports True\n", + "1 2 1 Business Sports False\n", + "2 1 1 Sports Sports True\n", + "3 2 1 Business Sports False\n", + "4 3 1 Sci/Tech Sports False\n", + "5 0 1 World Sports False\n", + "6 2 1 Business Sports False\n", + "7 2 1 Business Sports False\n", + "8 3 1 Sci/Tech Sports False\n", + "9 2 1 Business Sports False" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Peek at the raw predictions CSV — every test example with true and predicted label\n", + "import pandas as pd\n", + "pred_df = pd.read_csv('results/predictions.csv')\n", + "print(f'Total test examples : {len(pred_df):,}')\n", + "print(f'Correct predictions : {pred_df[\"correct\"].sum():,}')\n", + "print(f'Overall accuracy : {pred_df[\"correct\"].mean():.4f}')\n", + "print()\n", + "print('Sample predictions:')\n", + "pred_df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c461384a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total errors: 37\n", + "\n", + "Most common misclassification pairs:\n", + "true_label pred_label count\n", + " Business Sports 16\n", + " World Sports 13\n", + " Sci/Tech Sports 8\n" + ] + } + ], + "source": [ + "# Show which class pairs the model confuses most\n", + "errors = pred_df[~pred_df['correct']]\n", + "print(f'Total errors: {len(errors):,}')\n", + "print()\n", + "print('Most common misclassification pairs:')\n", + "confusion_pairs = (\n", + " errors.groupby(['true_label', 'pred_label'])\n", + " .size()\n", + " .reset_index(name='count')\n", + " .sort_values('count', ascending=False)\n", + " .head(5)\n", + ")\n", + "print(confusion_pairs.to_string(index=False))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/utils.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/utils.sh new file mode 100755 index 000000000..ce5eac693 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/utils.sh @@ -0,0 +1,571 @@ +#!/bin/bash + +# General utilities +run() { + cmd="$*" + echo "> $cmd" + eval "$cmd" +} + + +enable_verbose_mode() { + if [[ $VERBOSE == 1 ]]; then + set -x + fi +} + + +# Argument parsing +_print_default_help() { + echo "Usage: $(basename $0) [options]" + echo "" + echo "Options:" + echo " -f Force kill existing container with same name before starting" + echo " -h Print this help message and exit" + echo " -v Enable verbose output (set -x)" +} + + +parse_default_args() { + VERBOSE=0 + FORCE=0 + while getopts "fhv" flag; do + case "${flag}" in + f) FORCE=1;; + h) _print_default_help; exit 0;; + v) VERBOSE=1;; + *) _print_default_help; exit 1;; + esac + done + enable_verbose_mode +} + + +_print_docker_jupyter_help() { + # """ + # Print usage information and available options for docker_jupyter.sh. + # """ + echo "Usage: $(basename $0) [options]" + echo "" + echo "Launch Jupyter Lab inside a Docker container." + echo "" + echo "Options:" + echo " -f Force kill existing container with same name before starting" + echo " -h Print this help message and exit" + echo " -p PORT Host port to forward to Jupyter Lab (default: 8888)" + echo " -u Enable vim keybindings in Jupyter Lab" + echo " -v Enable verbose output (set -x)" +} + + +parse_docker_jupyter_args() { + # """ + # Parse command-line arguments for docker_jupyter.sh. + # + # Sets JUPYTER_HOST_PORT, JUPYTER_USE_VIM, TARGET_DIR, VERBOSE, FORCE, and + # OLD_CMD_OPTS in the caller's scope. Enables set -x when -v is passed. + # Prints help and exits when -h is passed. + # + # :param @: command-line arguments forwarded from the calling script + # """ + # Set defaults. + JUPYTER_HOST_PORT=8888 + JUPYTER_USE_VIM=0 + VERBOSE=0 + FORCE=0 + # Save original args to pass through to run_jupyter.sh. + OLD_CMD_OPTS="$*" + # Parse options. + while getopts "fhp:uv" flag; do + case "${flag}" in + f) FORCE=1;; + h) _print_docker_jupyter_help; exit 0;; + p) JUPYTER_HOST_PORT=${OPTARG};; # Port for Jupyter Lab. + u) JUPYTER_USE_VIM=1;; # Enable vim bindings. + v) VERBOSE=1;; # Enable verbose output. + *) _print_docker_jupyter_help; exit 1;; + esac + done + # Enable command tracing if verbose mode is requested. + enable_verbose_mode +} + + +# ############################################################################# +# Docker image management +# ############################################################################# + + +get_docker_vars_script() { + # """ + # Load Docker variables from docker_name.sh script. + # + # :param script_path: Path to the script to determine the Docker configuration directory + # :return: Sources REPO_NAME, IMAGE_NAME, and FULL_IMAGE_NAME variables + # """ + local script_path=$1 + # Find the name of the container. + SCRIPT_DIR=$(dirname $script_path) + DOCKER_NAME="$SCRIPT_DIR/docker_name.sh" + if [[ ! -e $SCRIPT_DIR ]]; then + echo "Can't find $DOCKER_NAME" + exit -1 + fi; + source $DOCKER_NAME +} + + +print_docker_vars() { + # """ + # Print current Docker variables to stdout. + # """ + echo "REPO_NAME=$REPO_NAME" + echo "IMAGE_NAME=$IMAGE_NAME" + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" +} + + +build_container_image() { + # """ + # Build a Docker container image. + # + # Supports both single-architecture and multi-architecture builds. + # Creates temporary build directory, copies files, and builds the image. + # + # :param @: Additional options to pass to docker build/buildx build + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + # Prepare build area. + #tar -czh . | docker build $OPTS -t $IMAGE_NAME - + DIR="../tmp.build" + if [[ -d $DIR ]]; then + rm -rf $DIR + fi; + cp -Lr . $DIR || true + # Build container. + echo "DOCKER_BUILDKIT=$DOCKER_BUILDKIT" + echo "DOCKER_BUILD_MULTI_ARCH=$DOCKER_BUILD_MULTI_ARCH" + if [[ $DOCKER_BUILD_MULTI_ARCH != 1 ]]; then + # Build for a single architecture. + echo "Building for current architecture..." + OPTS="--progress plain $@" + (cd $DIR; docker build $OPTS -t $FULL_IMAGE_NAME . 2>&1 | tee ../docker_build.log; exit ${PIPESTATUS[0]}) + else + # Build for multiple architectures. + echo "Building for multiple architectures..." + OPTS="$@" + export DOCKER_CLI_EXPERIMENTAL=enabled + # Create a new builder. + #docker buildx rm --all-inactive --force + #docker buildx create --name mybuilder + #docker buildx use mybuilder + # Use the default builder. + docker buildx use multiarch + docker buildx inspect --bootstrap + # Note that one needs to push to the repo since otherwise it is not + # possible to keep multiple. + (cd $DIR; docker buildx build --push --platform linux/arm64,linux/amd64 $OPTS --tag $FULL_IMAGE_NAME . 2>&1 | tee ../docker_build.log; exit ${PIPESTATUS[0]}) + # Report the status. + docker buildx imagetools inspect $FULL_IMAGE_NAME + fi; + # Report build version. + if [ -f docker_build.version.log ]; then + rm docker_build.version.log + fi + (cd $DIR; docker run --rm -it -v $(pwd):/data $FULL_IMAGE_NAME bash -c "/data/version.sh") 2>&1 | tee docker_build.version.log + # + docker image ls $REPO_NAME/$IMAGE_NAME + rm -rf $DIR + echo "*****************************" + echo "SUCCESS" + echo "*****************************" +} + + +remove_container_image() { + # """ + # Remove Docker container image(s) matching the current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker image ls | grep $FULL_IMAGE_NAME + docker image ls | grep $FULL_IMAGE_NAME | awk '{print $1}' | xargs -n 1 -t docker image rm -f + docker image ls + echo "${FUNCNAME[0]} ... done" +} + + +push_container_image() { + # """ + # Push Docker container image to registry. + # + # Authenticates using credentials from ~/.docker/passwd.$REPO_NAME.txt. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker login --username $REPO_NAME --password-stdin <~/.docker/passwd.$REPO_NAME.txt + docker images $FULL_IMAGE_NAME + docker push $FULL_IMAGE_NAME + echo "${FUNCNAME[0]} ... done" +} + + +pull_container_image() { + # """ + # Pull Docker container image from registry. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker pull $FULL_IMAGE_NAME + echo "${FUNCNAME[0]} ... done" +} + + +# ############################################################################# +# Docker container management +# ############################################################################# + + +kill_container() { + # """ + # Kill and remove Docker container(s) matching the current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker container ls + # + CONTAINER_ID=$(docker container ls -a | grep $FULL_IMAGE_NAME | awk '{print $1}') + echo "CONTAINER_ID=$CONTAINER_ID" + if [[ ! -z $CONTAINER_ID ]]; then + docker container rm -f $CONTAINER_ID + docker container ls + fi; + echo "${FUNCNAME[0]} ... done" +} + + +kill_container_by_name() { + # """ + # Kill and remove a Docker container by its name. + # + # :param container_name: Name of the container to kill + # """ + local container_name=$1 + echo "# ${FUNCNAME[0]}: $container_name" + # Check if container exists (running or stopped). + local container_id=$(docker container ls -a --filter "name=^${container_name}$" --format "{{.ID}}") + if [[ -n $container_id ]]; then + echo "Killing container: $container_name (ID: $container_id)" + docker container rm -f $container_id + else + echo "Container '$container_name' not found" + fi + echo "${FUNCNAME[0]} ... done" +} + + +exec_container() { + # """ + # Execute bash shell in running Docker container. + # + # Opens an interactive bash session in the first container matching the + # current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker container ls + # + CONTAINER_ID=$(docker container ls -a | grep $FULL_IMAGE_NAME | awk '{print $1}') + echo "CONTAINER_ID=$CONTAINER_ID" + docker exec -it $CONTAINER_ID bash + echo "${FUNCNAME[0]} ... done" +} + + +# ############################################################################# +# Docker common options +# ############################################################################# + + +get_docker_common_options() { + # """ + # Return docker run options common to all container types. + # + # Includes volume mount for the git root, plus environment variables for + # PYTHONPATH and host OS name. + # + # :return: docker run options string with volume mounts and env vars + # """ + echo "-v $GIT_ROOT:/git_root \ + -e PYTHONPATH=/git_root:/git_root/helpers_root:/git_root/msml610/tutorials \ + -e CSFY_GIT_ROOT_PATH=/git_root \ + -e CSFY_HOST_OS_NAME=$(uname -s) \ + -e CSFY_HOST_NAME=$(uname -n)" +} + + +# ############################################################################# +# Docker bash +# ############################################################################# + + +get_docker_bash_command() { + # """ + # Return the base docker run command for an interactive bash shell. + # + # :return: docker run command string with --rm and -ti flags + # """ + if [ -t 0 ]; then + echo "docker run --rm -ti" + else + echo "docker run --rm -i" + fi +} + + +get_docker_bash_options() { + # """ + # Return docker run options for a Docker container. + # + # :param container_name: Name for the Docker container + # :param port: Port number to forward (optional, skipped if empty) + # :param extra_opts: Additional docker run options (optional) + # :return: docker run options string with name, volume mounts, and env vars + # """ + local container_name=$1 + local port=$2 + local extra_opts=$3 + local port_opt="" + if [[ -n $port ]]; then + port_opt="-p $port:$port" + fi + echo "--name $container_name \ + $port_opt \ + $extra_opts \ + $(get_docker_common_options)" +} + + +# ############################################################################# +# Docker cmd +# ############################################################################# + + +get_docker_cmd_command() { + # """ + # Return the base docker run command for executing a non-interactive command. + # + # :return: docker run command string with --rm and -i flags + # """ + echo "docker run --rm -i" +} + + +# ############################################################################# +# Docker Jupyter +# ############################################################################# + + +get_docker_jupyter_command() { + # """ + # Return the base docker run command for running Jupyter Lab interactively. + # + # :return: docker run command string with --rm and -ti flags (if TTY available) + # """ + local docker_cmd="docker run --rm" + # Add interactive and TTY flags only if stdin is a TTY. + if [[ -t 0 ]]; then + docker_cmd="$docker_cmd -ti" + fi + echo "$docker_cmd" +} + + +get_docker_jupyter_options() { + # """ + # Return docker run options for a Jupyter Lab container. + # + # :param container_name: Name for the Docker container + # :param host_port: Host port to forward to container port 8888 + # :param jupyter_use_vim: 0 or 1 to enable vim bindings + # :return: docker run options string + # """ + local container_name=$1 + local host_port=$2 + local jupyter_use_vim=$3 + # Run as the current user when user is saggese. + if [[ "$(whoami)" == "saggese" ]]; then + echo "Overwriting jupyter_use_vim since user='saggese'" >&2 + jupyter_use_vim=1 + fi + echo "--name $container_name \ + -p $host_port:8888 \ + $(get_docker_common_options) \ + -e JUPYTER_USE_VIM=$jupyter_use_vim" +} + + +configure_jupyter_vim_keybindings() { + # """ + # Configure JupyterLab vim keybindings based on JUPYTER_USE_VIM env var. + # + # Reads JUPYTER_USE_VIM; if 1, verifies jupyterlab_vim is installed and + # writes enabled settings; otherwise writes disabled settings. + # """ + mkdir -p ~/.jupyter/lab/user-settings/@axlair/jupyterlab_vim + if [[ $JUPYTER_USE_VIM == 1 ]]; then + # Check that jupyterlab_vim is installed before trying to enable it. + if ! pip show jupyterlab_vim > /dev/null 2>&1; then + echo "ERROR: jupyterlab_vim is not installed but vim bindings were requested." + echo "Install it with: pip install jupyterlab_vim" + exit 1 + fi + echo "Enabling vim." + cat < ~/.jupyter/lab/user-settings/\@axlair/jupyterlab_vim/plugin.jupyterlab-settings +{ + "enabled": true, + "enabledInEditors": true, + "extraKeybindings": [], + "autosaveInterval": 6 +} +EOF + else + echo "Disabling vim." + cat < ~/.jupyter/lab/user-settings/\@axlair/jupyterlab_vim/plugin.jupyterlab-settings +{ + "enabled": false, + "enabledInEditors": false, + "extraKeybindings": [], + "autosaveInterval": 6 +} +EOF + fi; +} + + +configure_jupyter_notifications() { + # """ + # Disable JupyterLab news fetching and update checks. + # """ + mkdir -p ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension + cat < ~/.jupyter/lab/user-settings/\@jupyterlab/apputils-extension/notification.jupyterlab-settings +{ + // Notifications + // @jupyterlab/apputils-extension:notification + // Notifications settings. + + // Fetch official Jupyter news + // Whether to fetch news from the Jupyter news feed. If Always (`true`), it will make a request to a website. + "fetchNews": "false", + "checkForUpdates": false +} +EOF +} + + +configure_jupyter_autosave() { + # """ + # Configure JupyterLab global autosave interval to 6 seconds. + # """ + mkdir -p ~/.jupyter/lab/user-settings/@jupyterlab/docmanager-extension + cat < ~/.jupyter/lab/user-settings/\@jupyterlab/docmanager-extension/plugin.jupyterlab-settings +{ + "autosaveInterval": 6 +} +EOF +} + + +check_jupytext_installed() { + # """ + # Verify that jupytext is installed before starting Jupyter Lab. + # + # Jupytext is required for pair notebook/Python file functionality. + # Exits with error if jupytext is not installed. + # """ + if ! pip show jupytext > /dev/null 2>&1; then + echo "ERROR: jupytext is not installed but is required to run Jupyter Lab." + echo "Install it with: pip install jupytext" + exit 1 + fi +} + + +setup_jupyter_environment() { + # """ + # Configure Jupyter Lab environment before launching. + # + # Performs all necessary setup steps: + # - Configure vim keybindings + # - Disable notifications + # - Configure autosave interval + # - Verify jupytext is installed + # """ + configure_jupyter_vim_keybindings + configure_jupyter_notifications + configure_jupyter_autosave + check_jupytext_installed +} + + +get_jupyter_args() { + # """ + # Print the standard Jupyter Lab command-line arguments. + # + # :return: space-separated Jupyter Lab args for port 8888 with no browser, + # allow root, and no authentication + # """ + echo "--port=8888 --no-browser --ip=0.0.0.0 --allow-root --ServerApp.token='' --ServerApp.password=''" +} + + +get_run_jupyter_cmd() { + # """ + # Return the command to run run_jupyter.sh inside a container. + # + # Computes the script's path relative to GIT_ROOT and builds the + # corresponding /git_root/... path used inside the container. + # + # :param script_path: path of the calling script (pass ${BASH_SOURCE[0]}) + # :param cmd_opts: options to forward to run_jupyter.sh + # :return: full command string to run run_jupyter.sh + # """ + local script_path=$1 + local cmd_opts=$2 + local script_dir + script_dir=$(cd "$(dirname "$script_path")" && pwd) + local rel_dir="${script_dir#${GIT_ROOT}/}" + echo "/git_root/${rel_dir}/run_jupyter.sh $cmd_opts" +} + + +list_and_inspect_docker_image() { + # """ + # List available Docker images and inspect their architecture. + # + # Lists all images matching FULL_IMAGE_NAME and attempts to inspect + # their architecture using docker manifest inspect. + # """ + run "docker image ls $FULL_IMAGE_NAME" + (docker manifest inspect $FULL_IMAGE_NAME | grep arch) || true +} + + +kill_existing_container_if_forced() { + # """ + # Kill existing container if FORCE flag is set. + # + # If FORCE is set to 1, kills and removes the container with name + # CONTAINER_NAME. This is typically set by the -f flag. + # """ + if [[ $FORCE == 1 ]]; then + kill_container_by_name $CONTAINER_NAME + fi +} diff --git a/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/version.sh b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/version.sh new file mode 100755 index 000000000..3a4117d7e --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask443_DATA605_Spring2026_HuggingFace_Text_Classification_Model/version.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Report key package versions baked into this image. + +echo "============================================================" +echo " Image version report — $(date -u '+%Y-%m-%d %H:%M UTC')" +echo "============================================================" +echo "Python : $(python --version 2>&1)" +echo "pip : $(pip --version 2>&1)" +echo "------------------------------------------------------------" + +packages=( + torch + transformers + datasets + accelerate + evaluate + scikit-learn + pandas + numpy + optuna + jupyterlab +) + +for pkg in "${packages[@]}"; do + version=$(python -c "import importlib.metadata; print(importlib.metadata.version('$pkg'))" 2>/dev/null || echo "not installed") + printf "%-20s %s\n" "$pkg" "$version" +done + +echo "============================================================"