From 3b87eb01f1e949626344a35092391bde93d3ac2f Mon Sep 17 00:00:00 2001 From: Nannan Aravazhi Date: Wed, 3 Sep 2025 23:58:56 -0400 Subject: [PATCH] Submission: Nannan Aravazhi --- .gitignore | 1 + submissions/Nannan_Aravazhi/README.md | 44 +++++++ submissions/Nannan_Aravazhi/requirements.txt | 5 + submissions/Nannan_Aravazhi/summarizer_qa.py | 119 +++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 .gitignore create mode 100644 submissions/Nannan_Aravazhi/README.md create mode 100644 submissions/Nannan_Aravazhi/requirements.txt create mode 100644 submissions/Nannan_Aravazhi/summarizer_qa.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cebf2e1 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +submissions/Nannan_Aravazhi/venv/ diff --git a/submissions/Nannan_Aravazhi/README.md b/submissions/Nannan_Aravazhi/README.md new file mode 100644 index 0000000..8a05a8a --- /dev/null +++ b/submissions/Nannan_Aravazhi/README.md @@ -0,0 +1,44 @@ +Wikipedia Reading-Check + +This project takes a Wikipedia topic as input and fetches the article text. + +It then summarizes it using a lightweight model and generates one short reading-check question based on the summary. + +Runs fully on CPU. No GPU required. + +How to Run +1. Create and activate a virtual environment +python3 -m venv submissions/>/venv +source submissions//venv/bin/activate + +2. Install dependencies +pip install -r submissions/>/requirements.txt + +3. Run the program +python submissions/>/summarizer_qa.py + + +Then enter a topic, e.g.: + +Enter a Wikipedia topic: cristiano ronaldo +or +lionel messi + +Example Output +=== Summary === +Cristiano Ronaldo dos Santos Aveiro is a Portuguese international footballer... + +=== Reading Check === +1. How many goals has Cristiano Ronaldo scored in the Champions League? + +Models Used + +Summarizer: sshleifer/distilbart-cnn-12-6 + +Question Generator: iarfmoose/t5-base-question-generator + +Model Info: +First run downloads the model weights (~300MB summarizer, ~900MB QG). +After the first run, everything is cached locally in: + +~/.cache/huggingface/hub/ diff --git a/submissions/Nannan_Aravazhi/requirements.txt b/submissions/Nannan_Aravazhi/requirements.txt new file mode 100644 index 0000000..e74a229 --- /dev/null +++ b/submissions/Nannan_Aravazhi/requirements.txt @@ -0,0 +1,5 @@ +transformers +wikipedia-api +torch +sentencepiece +protobuf<5 \ No newline at end of file diff --git a/submissions/Nannan_Aravazhi/summarizer_qa.py b/submissions/Nannan_Aravazhi/summarizer_qa.py new file mode 100644 index 0000000..8cf1e0f --- /dev/null +++ b/submissions/Nannan_Aravazhi/summarizer_qa.py @@ -0,0 +1,119 @@ +import wikipediaapi +from transformers import pipeline +from typing import List, Dict + + +#constants +WIKI_CHAR_LIMIT = 1000 # limitng to avoid overflow +SUM_MODEL = "sshleifer/distilbart-cnn-12-6" # heard it was decent + lightweight +SUM_MAX_LEN = 200 +SUM_MIN_LEN = 50 +QG_MODEL = "iarfmoose/t5-base-question-generator" # model for question generation +QG_NUM_QUESTIONS = 1 # asking a reading check question +USER_AGENT = "NannanAravazhi-AIatGT-AR-Fall2025/1.0 (contact: naravazhi3@gatech.edu)" + + + +# intiializing pipeline here to use huggingface locally +summarizer_pipeline = pipeline("summarization", model=SUM_MODEL) +qg_pipeline = pipeline("text2text-generation", model=QG_MODEL) + + + + +# core functions +def fetch_wikipedia_content(topic: str) -> str: + """ + fetching text, limit to 1000, basic error handling + """ + wiki = wikipediaapi.Wikipedia( + language="en", + user_agent=USER_AGENT # setting user agent + ) + page = wiki.page(topic) + if not page.exists(): + raise ValueError(f"Can't find this topic: '{topic}'.") + + text = (page.summary or "").strip() + if not text: + text = (page.text or "").strip() + if not text: + raise ValueError(f"No readable content on this topic: '{topic}'.") + return text[:WIKI_CHAR_LIMIT] + + +def summarize_text(content: str) -> str: + """ + creating short summary here + """ + if not content or not content.strip(): + raise ValueError("Empty content; cannot summarize.") + result = summarizer_pipeline(content, max_length=SUM_MAX_LEN, min_length=SUM_MIN_LEN, do_sample=False) + return result[0]["summary_text"].strip() + + +def generate_questions(summary: str) -> List[str]: + """ + quiz-style questions + """ + if not summary or not summary.strip(): + return [] + + prompt = f"Generate {QG_NUM_QUESTIONS} unique, short quiz-style questions based on this summary:\n{summary}" + + out = qg_pipeline( + prompt, + max_length=64, + do_sample=True, #sampling + top_p=0.92, + top_k=50, + temperature=0.9, + num_return_sequences=QG_NUM_QUESTIONS, + num_beams=QG_NUM_QUESTIONS, + ) + qs = [it["generated_text"].strip() for it in out if it["generated_text"].strip()] + seen, deduped = set(), [] + for q in qs: + if q not in seen: + seen.add(q) + deduped.append(q) + if len(deduped) >= QG_NUM_QUESTIONS: + break + return deduped + + + + +# mani method +def main() -> None: + print("=== AI@GT: Wikipedia Summarizer and QGen ===") + topic = input("Enter a Wikipedia topic: ").strip() or "Georgia Institute of Technology" + try: + content = fetch_wikipedia_content(topic) + summary = summarize_text(content) + questions = generate_questions(summary) + + + # hashmap to hold + result: Dict[str, object] = { + "topic": topic, + "summary": summary, + "questions": questions + } + + + print("\n=== Summary ===\n" + result["summary"]) + print("\n=== Questions ===") + if questions: + for i, q in enumerate(questions, 1): + print(f"{i}. {q}") + else: + print("(No questions generated)") + + + except Exception as e: + print(f"\n[Error] {e}") + + +if __name__ == "__main__": + main()