From a94db03394d02742f40ae751effcf4b8783779a8 Mon Sep 17 00:00:00 2001 From: rohan-konanki <105895943+rohan-konanki@users.noreply.github.com> Date: Wed, 3 Sep 2025 22:43:47 -0400 Subject: [PATCH 1/5] Create README.md --- submissions/Submission: Rohan Konanki/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 submissions/Submission: Rohan Konanki/README.md diff --git a/submissions/Submission: Rohan Konanki/README.md b/submissions/Submission: Rohan Konanki/README.md new file mode 100644 index 0000000..48cdce8 --- /dev/null +++ b/submissions/Submission: Rohan Konanki/README.md @@ -0,0 +1 @@ +placeholder From ba291a3a46c6a5fa6bd7ee9acd4f194c28c5d3a7 Mon Sep 17 00:00:00 2001 From: rohan-konanki <105895943+rohan-konanki@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:10:19 -0400 Subject: [PATCH 2/5] Add files via upload --- .../Research_Application.ipynb | 679 ++++++++++++++++++ 1 file changed, 679 insertions(+) create mode 100644 submissions/Submission: Rohan Konanki/Research_Application.ipynb diff --git a/submissions/Submission: Rohan Konanki/Research_Application.ipynb b/submissions/Submission: Rohan Konanki/Research_Application.ipynb new file mode 100644 index 0000000..4067216 --- /dev/null +++ b/submissions/Submission: Rohan Konanki/Research_Application.ipynb @@ -0,0 +1,679 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Qe_spw0enTcU", + "outputId": "d7aa0dbc-1692-4732-e38d-3cdaea81d639" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.0.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from datasets) (3.19.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (2.0.2)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (18.1.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.12/dist-packages (from datasets) (2.32.4)\n", + "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.12/dist-packages (from datasets) (4.67.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2025.3.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2025.3.0)\n", + "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.34.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from datasets) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from datasets) (6.0.2)\n", + "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.12.15)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.24.0->datasets) (4.14.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.24.0->datasets) (1.1.8)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (2025.8.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.6.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.20.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", + "Collecting bs4\n", + " Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.12/dist-packages (from bs4) (4.13.5)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (2.7)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (4.14.1)\n", + "Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)\n", + "Installing collected packages: bs4\n", + "Successfully installed bs4-0.0.2\n" + ] + } + ], + "source": [ + "!pip install datasets\n", + "!pip install bs4" + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "from datasets import load_dataset\n", + "ds = load_dataset(\"GonzaloA/fake_news\", split=\"train\")\n", + "df = ds.to_pandas()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYbinwQwoNQp", + "outputId": "d6c1f063-d559-4041-d3d5-841b2012dfa8" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Repo card metadata block was not found. Setting CardData to empty.\n", + "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "df = df.dropna(subset=['text', 'label'])\n", + "X = df['text']\n", + "y = df['label']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ], + "metadata": { + "id": "4s7DM9wrpI0R" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tfidf_vectorizer = TfidfVectorizer(max_features=2500, stop_words='english')\n", + "X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)\n", + "X_test_tfidf = tfidf_vectorizer.transform(X_test)" + ], + "metadata": { + "id": "326OYLglp20I" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model = LogisticRegression(max_iter=1000, solver='liblinear')\n", + "model.fit(X_train_tfidf, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "2KAgTdG7qeI5", + "outputId": "c0807c1d-2900-4630-de17-972049dcb73c" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression(max_iter=1000, solver='liblinear')" + ], + "text/html": [ + "
LogisticRegression(max_iter=1000, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_pred = model.predict(X_test_tfidf)\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "report = classification_report(y_test, y_pred, target_names=['True', 'Fake'])\n", + "\n", + "print(\"\\n--- Model Evaluation ---\")\n", + "print(f\"Accuracy: {accuracy:.4f}\")\n", + "print(\"Classification Report:\")\n", + "print(report)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Y7wX0ZM6so36", + "outputId": "c55882c6-612a-40d1-a0cf-b5dddedfcab7" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "--- Model Evaluation ---\n", + "Accuracy: 0.9669\n", + "Classification Report:\n", + " precision recall f1-score support\n", + "\n", + " True 0.96 0.96 0.96 2248\n", + " Fake 0.97 0.97 0.97 2623\n", + "\n", + " accuracy 0.97 4871\n", + " macro avg 0.97 0.97 0.97 4871\n", + "weighted avg 0.97 0.97 0.97 4871\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "def predict_article(url):\n", + " try:\n", + " response = requests.get(url, timeout=10)\n", + " response.raise_for_status()\n", + " soup = BeautifulSoup(response.text, 'html.parser')\n", + " text = soup.get_text()\n", + " text_vectorized = tfidf_vectorizer.transform([text])\n", + " prediction = model.predict(text_vectorized)\n", + " return \"True\" if prediction[0] == 1 else \"Fake\"\n", + " except requests.exceptions.RequestException as e:\n", + " return f\"Error: Could not retrieve article from URL. {e}\"\n", + " except Exception as e:\n", + " return f\"Error: An unexpected error occurred. {e}\"" + ], + "metadata": { + "id": "xAT82LcUquTg" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "url_to_check = input(\"Please enter a URL to check: \")\n", + "prediction = predict_article(url_to_check)\n", + "print(f\"The article at {url_to_check} is predicted to be {prediction}.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2fMCoL8xrbIr", + "outputId": "ff3c09ae-b257-497d-f9ef-91ba1739aad2" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter a URL to check: https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706\n", + "The article at https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706 is predicted to be Fake.\n" + ] + } + ] + } + ] +} \ No newline at end of file From 4ba682988ec65df33b04ce423ae9eac45d0172e8 Mon Sep 17 00:00:00 2001 From: rohan-konanki <105895943+rohan-konanki@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:25:27 -0400 Subject: [PATCH 3/5] Update README.md --- submissions/Submission: Rohan Konanki/README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/submissions/Submission: Rohan Konanki/README.md b/submissions/Submission: Rohan Konanki/README.md index 48cdce8..e634063 100644 --- a/submissions/Submission: Rohan Konanki/README.md +++ b/submissions/Submission: Rohan Konanki/README.md @@ -1 +1,10 @@ -placeholder +This application works as a simple classifier between fake news and real news, trained on a news dataset from Hugging Face (using the distillbert base model). +Given a URL input from the user, it parses the contents at the URL and predicts whether the news is real or fake based on the context in the dataset. +The model is not entirely acccurate for newer articles, since it lacks the grounding data to determine the truth of the information. +The libraries required are as follows: +- datasets +- transformers +- bs4 (URL text parser) +- scikit-learn +- pandas +- requests From 53b06997cc7a00d71c8a4e1fd2a474bef65034d1 Mon Sep 17 00:00:00 2001 From: rohan-konanki <105895943+rohan-konanki@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:26:04 -0400 Subject: [PATCH 4/5] Delete submissions/Submission: Rohan Konanki/Research_Application.ipynb --- .../Research_Application.ipynb | 679 ------------------ 1 file changed, 679 deletions(-) delete mode 100644 submissions/Submission: Rohan Konanki/Research_Application.ipynb diff --git a/submissions/Submission: Rohan Konanki/Research_Application.ipynb b/submissions/Submission: Rohan Konanki/Research_Application.ipynb deleted file mode 100644 index 4067216..0000000 --- a/submissions/Submission: Rohan Konanki/Research_Application.ipynb +++ /dev/null @@ -1,679 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Qe_spw0enTcU", - "outputId": "d7aa0dbc-1692-4732-e38d-3cdaea81d639" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.0.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from datasets) (3.19.1)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (2.0.2)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (18.1.0)\n", - "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2)\n", - "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.12/dist-packages (from datasets) (2.32.4)\n", - "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.12/dist-packages (from datasets) (4.67.1)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.5.0)\n", - "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16)\n", - "Requirement already satisfied: fsspec<=2025.3.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2025.3.0)\n", - "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.34.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from datasets) (25.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from datasets) (6.0.2)\n", - "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.12.15)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.24.0->datasets) (4.14.1)\n", - "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub>=0.24.0->datasets) (1.1.8)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (3.4.3)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests>=2.32.2->datasets) (2025.8.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n", - "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.4.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.7.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.6.4)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.3.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.20.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", - "Collecting bs4\n", - " Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.12/dist-packages (from bs4) (4.13.5)\n", - "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (2.7)\n", - "Requirement already satisfied: typing-extensions>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (4.14.1)\n", - "Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)\n", - "Installing collected packages: bs4\n", - "Successfully installed bs4-0.0.2\n" - ] - } - ], - "source": [ - "!pip install datasets\n", - "!pip install bs4" - ] - }, - { - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "from datasets import load_dataset\n", - "ds = load_dataset(\"GonzaloA/fake_news\", split=\"train\")\n", - "df = ds.to_pandas()\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cYbinwQwoNQp", - "outputId": "d6c1f063-d559-4041-d3d5-841b2012dfa8" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Repo card metadata block was not found. Setting CardData to empty.\n", - "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import accuracy_score, classification_report\n", - "\n", - "df = df.dropna(subset=['text', 'label'])\n", - "X = df['text']\n", - "y = df['label']\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ], - "metadata": { - "id": "4s7DM9wrpI0R" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tfidf_vectorizer = TfidfVectorizer(max_features=2500, stop_words='english')\n", - "X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)\n", - "X_test_tfidf = tfidf_vectorizer.transform(X_test)" - ], - "metadata": { - "id": "326OYLglp20I" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "model = LogisticRegression(max_iter=1000, solver='liblinear')\n", - "model.fit(X_train_tfidf, y_train)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 80 - }, - "id": "2KAgTdG7qeI5", - "outputId": "c0807c1d-2900-4630-de17-972049dcb73c" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "LogisticRegression(max_iter=1000, solver='liblinear')" - ], - "text/html": [ - "
LogisticRegression(max_iter=1000, solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "source": [ - "y_pred = model.predict(X_test_tfidf)\n", - "accuracy = accuracy_score(y_test, y_pred)\n", - "report = classification_report(y_test, y_pred, target_names=['True', 'Fake'])\n", - "\n", - "print(\"\\n--- Model Evaluation ---\")\n", - "print(f\"Accuracy: {accuracy:.4f}\")\n", - "print(\"Classification Report:\")\n", - "print(report)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Y7wX0ZM6so36", - "outputId": "c55882c6-612a-40d1-a0cf-b5dddedfcab7" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "--- Model Evaluation ---\n", - "Accuracy: 0.9669\n", - "Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " True 0.96 0.96 0.96 2248\n", - " Fake 0.97 0.97 0.97 2623\n", - "\n", - " accuracy 0.97 4871\n", - " macro avg 0.97 0.97 0.97 4871\n", - "weighted avg 0.97 0.97 0.97 4871\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "import requests\n", - "from bs4 import BeautifulSoup\n", - "def predict_article(url):\n", - " try:\n", - " response = requests.get(url, timeout=10)\n", - " response.raise_for_status()\n", - " soup = BeautifulSoup(response.text, 'html.parser')\n", - " text = soup.get_text()\n", - " text_vectorized = tfidf_vectorizer.transform([text])\n", - " prediction = model.predict(text_vectorized)\n", - " return \"True\" if prediction[0] == 1 else \"Fake\"\n", - " except requests.exceptions.RequestException as e:\n", - " return f\"Error: Could not retrieve article from URL. {e}\"\n", - " except Exception as e:\n", - " return f\"Error: An unexpected error occurred. {e}\"" - ], - "metadata": { - "id": "xAT82LcUquTg" - }, - "execution_count": 12, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "url_to_check = input(\"Please enter a URL to check: \")\n", - "prediction = predict_article(url_to_check)\n", - "print(f\"The article at {url_to_check} is predicted to be {prediction}.\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2fMCoL8xrbIr", - "outputId": "ff3c09ae-b257-497d-f9ef-91ba1739aad2" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Please enter a URL to check: https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706\n", - "The article at https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706 is predicted to be Fake.\n" - ] - } - ] - } - ] -} \ No newline at end of file From 65e9394b72dbbbfeec2eaf359e663a050e78c53f Mon Sep 17 00:00:00 2001 From: rohan-konanki <105895943+rohan-konanki@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:26:16 -0400 Subject: [PATCH 5/5] Add files via upload --- .../Research_Application.ipynb | 223 ++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 submissions/Submission: Rohan Konanki/Research_Application.ipynb diff --git a/submissions/Submission: Rohan Konanki/Research_Application.ipynb b/submissions/Submission: Rohan Konanki/Research_Application.ipynb new file mode 100644 index 0000000..003a4a5 --- /dev/null +++ b/submissions/Submission: Rohan Konanki/Research_Application.ipynb @@ -0,0 +1,223 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Qe_spw0enTcU", + "outputId": "b037c15f-b656-4b06-a8ce-b641d861a72d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.55.4)\n", + "Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.0.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.19.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.34.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.21.4)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (18.1.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2025.3.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2025.3.0)\n", + "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.12.15)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (4.14.1)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.1.8)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.8.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.4.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.7.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.6.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.3.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.20.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", + "Requirement already satisfied: bs4 in /usr/local/lib/python3.12/dist-packages (0.0.2)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.12/dist-packages (from bs4) (4.13.5)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (2.7)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /usr/local/lib/python3.12/dist-packages (from beautifulsoup4->bs4) (4.14.1)\n" + ] + } + ], + "source": [ + "!pip install transformers datasets\n", + "!pip install bs4" + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "from datasets import load_dataset\n", + "ds = load_dataset(\"GonzaloA/fake_news\", split=\"train\")\n", + "df = ds.to_pandas()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cYbinwQwoNQp", + "outputId": "429ac36d-55cc-4a91-bd61-bdc912537af4" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Repo card metadata block was not found. Setting CardData to empty.\n", + "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.metrics import accuracy_score, classification_report\n", + "\n", + "df = df.dropna(subset=['text', 'label'])\n", + "X = df['text']\n", + "y = df['label']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ], + "metadata": { + "id": "4s7DM9wrpI0R" + }, + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tfidf_vectorizer = TfidfVectorizer(max_features=2500, stop_words='english')\n", + "X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)\n", + "X_test_tfidf = tfidf_vectorizer.transform(X_test)" + ], + "metadata": { + "id": "326OYLglp20I" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from transformers import pipeline\n", + "classifier = pipeline(\"text-classification\", model=\"distilbert-base-uncased-finetuned-sst-2-english\", truncation=True)\n", + "sample_size = 50\n", + "X_sample = X_test.head(sample_size).tolist()\n", + "y_sample = y_test.head(sample_size).tolist()\n", + "y_pred_raw = classifier(X_sample)\n", + "y_pred = [1 if p['label'] == 'POSITIVE' else 0 for p in y_pred_raw]\n", + "accuracy = accuracy_score(y_sample, y_pred)\n", + "report = classification_report(y_sample, y_pred, target_names=['True', 'Fake'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2KAgTdG7qeI5", + "outputId": "e5e6754e-b4fd-491f-ec6b-1f3b154ee002" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Device set to use cpu\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "def predict_article(url):\n", + " try:\n", + " response = requests.get(url, timeout=10)\n", + " response.raise_for_status()\n", + " soup = BeautifulSoup(response.text, 'html.parser')\n", + " text = soup.get_text()\n", + " #data is truncated for token reasons\n", + " text_to_predict = text[:512]\n", + " prediction_raw = classifier(text_to_predict)\n", + " prediction = prediction_raw[0]['label']\n", + " return \"Real\" if prediction == 'POSITIVE' else \"Fake\"\n", + " except requests.exceptions.RequestException as e:\n", + " return f\"Error: Could not retrieve article from URL. {e}\"\n", + " except Exception as e:\n", + " return f\"Error: An unexpected error occurred. {e}\"" + ], + "metadata": { + "id": "xAT82LcUquTg" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "url_to_check = input(\"Please enter a URL to check: \")\n", + "prediction = predict_article(url_to_check)\n", + "print(f\"The article at {url_to_check} is predicted to be {prediction}.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2fMCoL8xrbIr", + "outputId": "f4316ed6-7878-4805-9f45-3d875d3cd218" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter a URL to check: https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706\n", + "The article at https://abcnews.go.com/Politics/trump-calls-epstein-files-irrelevant-push-release-gains/story?id=125225706 is predicted to be Fake.\n" + ] + } + ] + } + ] +} \ No newline at end of file