aigatech · Rxr2664 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/submissions/Rishabh_Raman/.gitignore b/submissions/Rishabh_Raman/.gitignore
@@ -0,0 +1,6 @@
+venv/
+.env
+*.pyc
+__pycache__/
+.ipynb_checkpoints/
+.DS_Store
diff --git a/submissions/Rishabh_Raman/README.md b/submissions/Rishabh_Raman/README.md
@@ -0,0 +1,38 @@
+# NFL Play Tagger (Zero-Shot)
+
+
+
+\*\*What it does\*\*  
+
+Classifies short NFL play descriptions into one of:
+
+`touchdown, field goal, turnover, penalty, pass, rush, timeout, injury`.
+
+
+
+Uses a Hugging Face zero-shot model, so no training is needed—runs quickly on CPU. Includes a tiny heuristic (e.g., “sack”) to show how simple rules can complement ML.
+
+
+
+\*\*Why NFL?\*\*  
+
+As A Georgia Tech Student,  I’m interested in sports analytics and all sorts of games and fun including Clash Royale, and this project will help me for the Research Position team as it help me model games! This is NFL in honor of the season starting tomorrow and I cannot wait! Football play text is structured and event-driven, so labeling lines is a realistic first step toward drive summaries or dashboards.
+
+
+
+---
+
+
+
+\## How to run
+
+
+
+1\. Install:
+
+```bash
+
+pip install -r requirements.txt
+
+
+
diff --git a/submissions/Rishabh_Raman/metrics.py b/submissions/Rishabh_Raman/metrics.py
@@ -0,0 +1,18 @@
+"""
+Quick metrics on predictions.csv: row count, label distribution, mean confidence.
+"""
+import sys
+import pandas as pd
+
+path = sys.argv[1] if len(sys.argv) > 1 else "predictions.csv"
+df = pd.read_csv(path)
+
+print("Rows:", len(df))
+if "pred_label" not in df or "pred_score" not in df:
+    raise SystemExit("predictions.csv missing required columns.")
+
+print("\nLabel counts:")
+print(df["pred_label"].value_counts())
+
+print("\nMean confidence by label:")
+print(df.groupby("pred_label")["pred_score"].mean().round(3).sort_values(ascending=False))
diff --git a/submissions/Rishabh_Raman/play_tagger.py b/submissions/Rishabh_Raman/play_tagger.py
@@ -0,0 +1,91 @@
+"""
+NFL Play Tagger (Zero-Shot)
+Author: Rishabh Raman
+
+Classifies short NFL play descriptions into one of:
+  touchdown, field goal, turnover, penalty, pass, rush, timeout, injury
+"""
+
+import argparse
+import pandas as pd
+from transformers import pipeline
+
+# NFL-specific labels (kept small on purpose)
+NFL_LABELS = [
+    "touchdown",
+    "field goal",
+    "turnover",
+    "penalty",
+    "pass",
+    "rush",
+    "timeout",
+    "injury",
+]
+
+def build_classifier(model_name: str = "typeform/distilbert-base-uncased-mnli"):
+    """Create zero-shot pipeline on CPU."""
+    return pipeline("zero-shot-classification", model=model_name, device=-1)
+
+def classify_texts(clf, texts, labels, multi_label=False):
+    """Run zero-shot classification and normalize to a list."""
+    out = clf(texts, candidate_labels=labels, multi_label=multi_label)
+    return out if isinstance(out, list) else [out]
+
+def heuristic_hint(text: str):
+    """
+    Tiny domain hint to show awareness (doesn't override predictions).
+    """
+    t = text.lower()
+    if "sack" in t:
+        return "Hint: contains 'sack' → negative play; turnover only if fumble/lost ball."
+    if "extra point" in t:
+        return "Hint: extra point attempt → usually follows a touchdown."
+    return None
+
+def run_single(clf, text, labels, multi_label):
+    res = classify_texts(clf, [text], labels, multi_label)[0]
+    print(f"\nText: {text}")
+    print(f"Top-1: {res['labels'][0]} ({res['scores'][0]:.3f})")
+    top3 = list(zip(res["labels"][:3], [round(s, 3) for s in res["scores"][:3]]))
+    print("Top-3:", top3)
+    hint = heuristic_hint(text)
+    if hint:
+        print(hint)
+
+def run_csv(clf, in_csv, out_csv, text_col, labels, multi_label):
+    df = pd.read_csv(in_csv)
+    if text_col not in df.columns:
+        raise ValueError(f"Column '{text_col}' not found in {in_csv}. Columns: {list(df.columns)}")
+    if df.empty:
+        raise ValueError(f"{in_csv} is empty.")
+    preds = classify_texts(clf, df[text_col].tolist(), labels, multi_label)
+    df["pred_label"] = [p["labels"][0] for p in preds]
+    df["pred_score"] = [float(p["scores"][0]) for p in preds]
+    df.to_csv(out_csv, index=False)
+    print(f"Saved predictions → {out_csv}")
+
+def main():
+    ap = argparse.ArgumentParser(description="NFL Play Tagger (Zero-Shot)")
+    ap.add_argument("--model", default="typeform/distilbert-base-uncased-mnli")
+    ap.add_argument("--labels", nargs="*", default=NFL_LABELS)
+    ap.add_argument("--multi_label", action="store_true")
+    ap.add_argument("--mode", choices=["single", "csv"], required=True)
+    ap.add_argument("--text", help="Play text (single mode)")
+    ap.add_argument("--in_csv", help="Input CSV path (csv mode)")
+    ap.add_argument("--text_col", default="description")
+    ap.add_argument("--out_csv", default="predictions.csv")
+    args = ap.parse_args()
+
+    clf = build_classifier(args.model)
+
+    if args.mode == "single":
+        if not args.text:
+            raise SystemExit("Provide --text for single mode")
+        run_single(clf, args.text, args.labels, args.multi_label)
+    else:
+        if not args.in_csv:
+            raise SystemExit("Provide --in_csv for csv mode")
+        run_csv(clf, args.in_csv, args.out_csv, args.text_col, args.multi_label)
+
+if __name__ == "__main__":
+    main()
diff --git a/submissions/Rishabh_Raman/requirements.txt b/submissions/Rishabh_Raman/requirements.txt
@@ -0,0 +1,3 @@
+transformers>=4.41.0
+torch
+pandas
diff --git a/submissions/Rishabh_Raman/sample_data.csv b/submissions/Rishabh_Raman/sample_data.csv
@@ -0,0 +1,9 @@
+description
+Mahomes throws a 25-yard touchdown to Kelce.
+Field goal attempt is good from 42 yards.
+Intercepted at midfield, turnover!
+Holding penalty on the offensive line.
+Henry rushes up the middle for 6 yards.
+Allen completes a short pass to Cook.
+Timeout called by the Cowboys.
+Medical staff checking on injured player.