diff --git a/prediction_acquition-youtube/1-extract_transcripts.ipynb b/prediction_acquition-youtube/1-extract_transcripts.ipynb
index 5ea2fd5..9d08826 100644
--- a/prediction_acquition-youtube/1-extract_transcripts.ipynb
+++ b/prediction_acquition-youtube/1-extract_transcripts.ipynb
@@ -10,7 +10,7 @@
"- **Tasks:**\n",
" \n",
" 1. Find any YouTube clip where predictions exists. For ex, [Immediate 2025 NCAA tournament Final Four and championship picks](https://youtu.be/-rjnvL9LL3U?si=QMFJYAQ8Q85lTNCD). Below, we include all the videos we extract data from.\n",
- " 2. Use the [`youtube-transcript-api`](https://pypi.org/project/youtube-transcript-api/) to retrieve the transcript.\n",
+ " 2. Use the [`youtube-transcript-api`](https://pypi.org/project/youtube-transcript-api/) or [`whisper`](https://huggingface.co/openai/whisper-large) to retrieve the transcript.\n",
" 3. Extract the transcript snippets.\n",
" 4. Save the raw transcript snippets."
]
@@ -30,6 +30,12 @@
"from youtube_transcript_api import YouTubeTranscriptApi\n",
"from youtube_transcript_api._errors import NoTranscriptFound\n",
"\n",
+ "import yt_dlp\n",
+ "from transformers import pipeline\n",
+ "\n",
+ "from typing import Any, Mapping\n",
+ "import math\n",
+ "\n",
"# Get the current working directory of the notebook\n",
"notebook_dir = os.getcwd()\n",
"# Add the parent directory to the system path\n",
@@ -61,8 +67,12 @@
"\n",
"---\n",
"\n",
- "1. `YouTubeTranscriptApi()` to fetch the YT transcripts.\n",
- "2. `extract_data()` to transform data into Pandas DataFrame."
+ "1. `YouTubeTranscriptApi()`: fetch the YT transcripts.\n",
+ "2. `extract_data()`: transform data into Pandas DataFrame.\n",
+ "3. `fetch_en_auto_transcript_snippets(ytt_api, video_id)`: lists available transcripts for the \"video_id\", selects **English (auto-generated)** when possible (\"language_code == \"en\" and \"is_generated == True\"), returns a list of \"FetchedTranscriptSnippet\" objects (each snippet has \"text\", \"start\", \"duration\").\n",
+ "4. `extract_data(snippets)`: converts the list of \"FetchedTranscriptSnippet\" objects into a Pandas DataFrame, output columns: \"Text\", \"Start Time\", \"Duration\" (and we add \"Video ID\" for consistency).\n",
+ "5. `download_audio(video_id, output_path=...)`: downloads the best available audio stream for that YouTube video (using \"yt-dlp\"), saves it to \"output_path\" (e.g. \".../{video_id}.webm\")\n",
+ "6. `whisper_result_to_dataframe(result, video_id)`: converts Whisper output (with timestamps) into a Pandas DataFrame, output columns: \"Text\", \"Start Time\", \"Duration\", \"Video ID\"."
]
},
{
@@ -103,1108 +113,10 @@
" return df"
]
},
- {
- "cell_type": "markdown",
- "id": "65bd97ff",
- "metadata": {},
- "source": [
- "## Transcripts \n",
- "\n",
- "| # | Title | Link | Video ID |\n",
- "|---|---|---|---|\n",
- "| 1 | Immediate 2025 NCAA tournament Final Four and championship picks | https://www.youtube.com/watch?v=-rjnvL9LL3U | -rjnvL9LL3U \n",
- "| 2 | FULL PREVIEW & PICKS: Patriots vs. Seahawks Super Bowl LX 🏆 Who wins the Lombardi Trophy? \\| NFL Live | https://www.youtube.com/watch?v=ZZN7BAYeOtc | ZZN7BAYeOtc |\n",
- "| 3 | FIRST TAKE'S SUPER BOWL PICKS! The crew is going with... 😱 | https://www.youtube.com/watch?v=mBK8o5orBbE | mBK8o5orBbE |\n",
- "| 4 | NFL Predictions and Picks For Super Bowl LX [Patriots vs Seahawks] - Best Bets ✅ | https://www.youtube.com/watch?v=LXPQrZV4Cfw | LXPQrZV4Cfw |\n",
- "| 5 | Rich Eisen’s Pick to Win the Seahawks vs Patriots Super Bowl LX Is….? - The Rich Eisen Show | https://www.youtube.com/watch?v=fUmJAtFEGn8 | fUmJAtFEGn8 |\n",
- "| 6 | The Pat McAfee Show's Picks For Super Bowl LX | https://www.youtube.com/watch?v=MTVAkVkkaz4 | MTVAkVkkaz4 |\n",
- "| 7 | Super Bowl LX On-Site Preview: Picks, Predictions, Everything you need to know for Patriots-Seahawks | https://www.youtube.com/watch?v=Z0xP3GNpjkw | Z0xP3GNpjkw |"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "85c3ba43",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[FetchedTranscriptSnippet(text='.', start=0.283, duration=1.248),\n",
- " FetchedTranscriptSnippet(text='THAT WILL BE A MONSTER GAME.', start=0.383, duration=5.879),\n",
- " FetchedTranscriptSnippet(text=\">> LET'S HOPE EVERYBODY -- HELP\", start=3.23, duration=8.195),\n",
- " FetchedTranscriptSnippet(text='EVERYBODY OUT MORE.', start=6.295, duration=9.744),\n",
- " FetchedTranscriptSnippet(text='THE 16 SEED HAS ONE TWO OF 24', start=11.458, duration=6.346),\n",
- " FetchedTranscriptSnippet(text='MEETINGS.', start=16.072, duration=4.864),\n",
- " FetchedTranscriptSnippet(text='ANALYTICS SAY ALL THE TOP THREE', start=17.837, duration=6.963)]"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "video_id = \"-rjnvL9LL3U\"\n",
- "transcript_snippets = ytt_api.fetch(video_id)\n",
- "transcript_snippets[:7]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "c6c513f8",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " . | \n",
- " 0.283 | \n",
- " 1.248 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " THAT WILL BE A MONSTER GAME. | \n",
- " 0.383 | \n",
- " 5.879 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " >> LET'S HOPE EVERYBODY -- HELP | \n",
- " 3.230 | \n",
- " 8.195 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " EVERYBODY OUT MORE. | \n",
- " 6.295 | \n",
- " 9.744 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " THE 16 SEED HAS ONE TWO OF 24 | \n",
- " 11.458 | \n",
- " 6.346 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " MEETINGS. | \n",
- " 16.072 | \n",
- " 4.864 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " ANALYTICS SAY ALL THE TOP THREE | \n",
- " 17.837 | \n",
- " 6.963 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " SEEDS WILL ADVANCE. | \n",
- " 20.969 | \n",
- " 6.129 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " THE NINE SEED HAS WON TWO THIRDS | \n",
- " 24.833 | \n",
- " 4.197 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " OF THE GAMES AGAINST THE EIGHT | \n",
- " 27.131 | \n",
- " 2.566 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " SEED. | \n",
- " 29.063 | \n",
- " 3.481 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " YOU ALL HAD UCONN WINNING IT | \n",
- " 31.595 | \n",
- " 3.348 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " LAST YEAR. | \n",
- " 32.577 | \n",
- " 5.264 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " WHO MAKES IT TO THE FINAL FOUR? | \n",
- " 34.976 | \n",
- " 3.464 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " AND WHO CUTS DOWN THE NETS? | \n",
- " 37.874 | \n",
- " 5.396 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " CLARK: I GET TO CHOOSE FIRST? | \n",
- " 38.474 | \n",
- " 6.495 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " I WILL TAKE MICHIGAN STATE. | \n",
- " 43.304 | \n",
- " 4.963 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " FLORIDA IS THE MOST COMPLETE | \n",
- " 47.168 | \n",
- " 4.581 | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " TEAM IN THE FIELD. | \n",
- " 48.300 | \n",
- " 4.515 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " CLEMSON IS MY DARK HORSE. | \n",
- " 51.782 | \n",
- " 3.098 | \n",
- "
\n",
- " \n",
- " | 20 | \n",
- " THEY ARE RUGGED. | \n",
- " 52.848 | \n",
- " 3.131 | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " THEY ALMOST GOT TO THE FINAL | \n",
- " 54.913 | \n",
- " 2.499 | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " FOUR LAST YEAR. | \n",
- " 56.012 | \n",
- " 3.798 | \n",
- "
\n",
- " \n",
- " | 23 | \n",
- " AND THEN WE HAVE DUKE. | \n",
- " 57.445 | \n",
- " 4.763 | \n",
- "
\n",
- " \n",
- " | 24 | \n",
- " I AM TAKING CLEMSON IN SAN | \n",
- " 59.843 | \n",
- " 6.978 | \n",
- "
\n",
- " \n",
- " | 25 | \n",
- " ANTONIO. | \n",
- " 62.241 | \n",
- " 6.979 | \n",
- "
\n",
- " \n",
- " | 26 | \n",
- " I AM GOING WITH THE GATORS AS MY | \n",
- " 66.855 | \n",
- " 2.598 | \n",
- "
\n",
- " \n",
- " | 27 | \n",
- " CHAMP. | \n",
- " 69.253 | \n",
- " 3.997 | \n",
- "
\n",
- " \n",
- " | 28 | \n",
- " >> YOU ARE GETTING REALLY BOLD. | \n",
- " 70.885 | \n",
- " 4.631 | \n",
- "
\n",
- " \n",
- " | 29 | \n",
- " I AM A LITTLE LESS BOLD. | \n",
- " 73.284 | \n",
- " 4.397 | \n",
- "
\n",
- " \n",
- " | 30 | \n",
- " I HAD IOWA STATE HERE BUT | \n",
- " 77.148 | \n",
- " 2.698 | \n",
- "
\n",
- " \n",
- " | 31 | \n",
- " BECAUSE OF THE INJURY TO GILBERT | \n",
- " 77.714 | \n",
- " 4.430 | \n",
- "
\n",
- " \n",
- " | 32 | \n",
- " I WILL TAKE THE SPARTANS. | \n",
- " 79.879 | \n",
- " 4.581 | \n",
- "
\n",
- " \n",
- " | 33 | \n",
- " I WILL TAKE DUKE AND FLORIDA IN | \n",
- " 82.561 | \n",
- " 2.798 | \n",
- "
\n",
- " \n",
- " | 34 | \n",
- " THE FINAL AND THE BLUE DEVILS, | \n",
- " 84.493 | \n",
- " 3.731 | \n",
- "
\n",
- " \n",
- " | 35 | \n",
- " ASSUMING THAT COOPER FLAGG IS | \n",
- " 85.393 | \n",
- " 3.431 | \n",
- "
\n",
- " \n",
- " | 36 | \n",
- " HEALTHY, THEY ARE CUTTING DOWN | \n",
- " 88.258 | \n",
- " 0.865 | \n",
- "
\n",
- " \n",
- " | 37 | \n",
- " THE NETS. | \n",
- " 88.857 | \n",
- " 3.697 | \n",
- "
\n",
- " \n",
- " | 38 | \n",
- " >> HERE WE GO. | \n",
- " 90.789 | \n",
- " 2.532 | \n",
- "
\n",
- " \n",
- " | 39 | \n",
- " I LIKE AUBURN COMING OUT OF THE | \n",
- " 92.721 | \n",
- " 2.632 | \n",
- "
\n",
- " \n",
- " | 40 | \n",
- " SOUTH. | \n",
- " 93.354 | \n",
- " 2.965 | \n",
- "
\n",
- " \n",
- " | 41 | \n",
- " I LIKE ST. JOHN LEADING OUT OF | \n",
- " 95.719 | \n",
- " 2.765 | \n",
- "
\n",
- " \n",
- " | 42 | \n",
- " THE WEST. | \n",
- " 96.352 | \n",
- " 3.714 | \n",
- "
\n",
- " \n",
- " | 43 | \n",
- " DUKE IN THE EAST, HOUSTON IN THE | \n",
- " 98.517 | \n",
- " 3.814 | \n",
- "
\n",
- " \n",
- " | 44 | \n",
- " MIDWEST. | \n",
- " 100.099 | \n",
- " 3.431 | \n",
- "
\n",
- " \n",
- " | 45 | \n",
- " IN MY CHAMPIONSHIP GAME, I HAVE | \n",
- " 102.731 | \n",
- " 2.964 | \n",
- "
\n",
- " \n",
- " | 46 | \n",
- " HOUSTON AND AUBURN AND I HAVE | \n",
- " 103.563 | \n",
- " 4.664 | \n",
- "
\n",
- " \n",
- " | 47 | \n",
- " THE AUBURN TIGERS FINISHING THE | \n",
- " 105.729 | \n",
- " 3.031 | \n",
- "
\n",
- " \n",
- " | 48 | \n",
- " SEASON AS NATIONAL CHAMPS. | \n",
- " 108.260 | \n",
- " 3.132 | \n",
- "
\n",
- " \n",
- " | 49 | \n",
- " ADAM: I THOUGHT HE WOULD GO BIG | \n",
- " 108.793 | \n",
- " 2.932 | \n",
- "
\n",
- " \n",
- " | 50 | \n",
- " EAST. | \n",
- " 111.425 | \n",
- " 3.264 | \n",
- "
\n",
- " \n",
- " | 51 | \n",
- " >> THREE DIFFERENT CHAMPS FOR | \n",
- " 112.957 | \n",
- " 1.899 | \n",
- "
\n",
- " \n",
- " | 52 | \n",
- " US. | \n",
- " 114.723 | \n",
- " 3.015 | \n",
- "
\n",
- " \n",
- " | 53 | \n",
- " ADAM: THE BIG EAST HAVE FIVE IN | \n",
- " 114.889 | \n",
- " 5.514 | \n",
- "
\n",
- " \n",
- " | 54 | \n",
- " THE TOURNAMENT FOR US. | \n",
- " 117.771 | \n",
- " 3.065 | \n",
- "
\n",
- " \n",
- " | 55 | \n",
- " YOU MIXED IT UP. | \n",
- " 120.436 | \n",
- " 3.597 | \n",
- "
\n",
- " \n",
- " | 56 | \n",
- " >> HE LEARNED NOT TO GO WITH MY | \n",
- " 120.869 | \n",
- " 3.331 | \n",
- "
\n",
- " \n",
- " | 57 | \n",
- " PICK. | \n",
- " 124.067 | \n",
- " 1.415 | \n",
- "
\n",
- " \n",
- " | 58 | \n",
- " CLARK: YOU GUY | \n",
- " 124.300 | \n",
- " 1.182 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Text Start Time Duration\n",
- "0 . 0.283 1.248\n",
- "1 THAT WILL BE A MONSTER GAME. 0.383 5.879\n",
- "2 >> LET'S HOPE EVERYBODY -- HELP 3.230 8.195\n",
- "3 EVERYBODY OUT MORE. 6.295 9.744\n",
- "4 THE 16 SEED HAS ONE TWO OF 24 11.458 6.346\n",
- "5 MEETINGS. 16.072 4.864\n",
- "6 ANALYTICS SAY ALL THE TOP THREE 17.837 6.963\n",
- "7 SEEDS WILL ADVANCE. 20.969 6.129\n",
- "8 THE NINE SEED HAS WON TWO THIRDS 24.833 4.197\n",
- "9 OF THE GAMES AGAINST THE EIGHT 27.131 2.566\n",
- "10 SEED. 29.063 3.481\n",
- "11 YOU ALL HAD UCONN WINNING IT 31.595 3.348\n",
- "12 LAST YEAR. 32.577 5.264\n",
- "13 WHO MAKES IT TO THE FINAL FOUR? 34.976 3.464\n",
- "14 AND WHO CUTS DOWN THE NETS? 37.874 5.396\n",
- "15 CLARK: I GET TO CHOOSE FIRST? 38.474 6.495\n",
- "16 I WILL TAKE MICHIGAN STATE. 43.304 4.963\n",
- "17 FLORIDA IS THE MOST COMPLETE 47.168 4.581\n",
- "18 TEAM IN THE FIELD. 48.300 4.515\n",
- "19 CLEMSON IS MY DARK HORSE. 51.782 3.098\n",
- "20 THEY ARE RUGGED. 52.848 3.131\n",
- "21 THEY ALMOST GOT TO THE FINAL 54.913 2.499\n",
- "22 FOUR LAST YEAR. 56.012 3.798\n",
- "23 AND THEN WE HAVE DUKE. 57.445 4.763\n",
- "24 I AM TAKING CLEMSON IN SAN 59.843 6.978\n",
- "25 ANTONIO. 62.241 6.979\n",
- "26 I AM GOING WITH THE GATORS AS MY 66.855 2.598\n",
- "27 CHAMP. 69.253 3.997\n",
- "28 >> YOU ARE GETTING REALLY BOLD. 70.885 4.631\n",
- "29 I AM A LITTLE LESS BOLD. 73.284 4.397\n",
- "30 I HAD IOWA STATE HERE BUT 77.148 2.698\n",
- "31 BECAUSE OF THE INJURY TO GILBERT 77.714 4.430\n",
- "32 I WILL TAKE THE SPARTANS. 79.879 4.581\n",
- "33 I WILL TAKE DUKE AND FLORIDA IN 82.561 2.798\n",
- "34 THE FINAL AND THE BLUE DEVILS, 84.493 3.731\n",
- "35 ASSUMING THAT COOPER FLAGG IS 85.393 3.431\n",
- "36 HEALTHY, THEY ARE CUTTING DOWN 88.258 0.865\n",
- "37 THE NETS. 88.857 3.697\n",
- "38 >> HERE WE GO. 90.789 2.532\n",
- "39 I LIKE AUBURN COMING OUT OF THE 92.721 2.632\n",
- "40 SOUTH. 93.354 2.965\n",
- "41 I LIKE ST. JOHN LEADING OUT OF 95.719 2.765\n",
- "42 THE WEST. 96.352 3.714\n",
- "43 DUKE IN THE EAST, HOUSTON IN THE 98.517 3.814\n",
- "44 MIDWEST. 100.099 3.431\n",
- "45 IN MY CHAMPIONSHIP GAME, I HAVE 102.731 2.964\n",
- "46 HOUSTON AND AUBURN AND I HAVE 103.563 4.664\n",
- "47 THE AUBURN TIGERS FINISHING THE 105.729 3.031\n",
- "48 SEASON AS NATIONAL CHAMPS. 108.260 3.132\n",
- "49 ADAM: I THOUGHT HE WOULD GO BIG 108.793 2.932\n",
- "50 EAST. 111.425 3.264\n",
- "51 >> THREE DIFFERENT CHAMPS FOR 112.957 1.899\n",
- "52 US. 114.723 3.015\n",
- "53 ADAM: THE BIG EAST HAVE FIVE IN 114.889 5.514\n",
- "54 THE TOURNAMENT FOR US. 117.771 3.065\n",
- "55 YOU MIXED IT UP. 120.436 3.597\n",
- "56 >> HE LEARNED NOT TO GO WITH MY 120.869 3.331\n",
- "57 PICK. 124.067 1.415\n",
- "58 CLARK: YOU GUY 124.300 1.182"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "snippets_df = extract_data(transcript_snippets)\n",
- "snippets_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "808123fc",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- " Video ID | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " . | \n",
- " 0.283 | \n",
- " 1.248 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " THAT WILL BE A MONSTER GAME. | \n",
- " 0.383 | \n",
- " 5.879 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " >> LET'S HOPE EVERYBODY -- HELP | \n",
- " 3.230 | \n",
- " 8.195 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " EVERYBODY OUT MORE. | \n",
- " 6.295 | \n",
- " 9.744 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " THE 16 SEED HAS ONE TWO OF 24 | \n",
- " 11.458 | \n",
- " 6.346 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " MEETINGS. | \n",
- " 16.072 | \n",
- " 4.864 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " ANALYTICS SAY ALL THE TOP THREE | \n",
- " 17.837 | \n",
- " 6.963 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " SEEDS WILL ADVANCE. | \n",
- " 20.969 | \n",
- " 6.129 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " THE NINE SEED HAS WON TWO THIRDS | \n",
- " 24.833 | \n",
- " 4.197 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " OF THE GAMES AGAINST THE EIGHT | \n",
- " 27.131 | \n",
- " 2.566 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " SEED. | \n",
- " 29.063 | \n",
- " 3.481 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " YOU ALL HAD UCONN WINNING IT | \n",
- " 31.595 | \n",
- " 3.348 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " LAST YEAR. | \n",
- " 32.577 | \n",
- " 5.264 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " WHO MAKES IT TO THE FINAL FOUR? | \n",
- " 34.976 | \n",
- " 3.464 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " AND WHO CUTS DOWN THE NETS? | \n",
- " 37.874 | \n",
- " 5.396 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " CLARK: I GET TO CHOOSE FIRST? | \n",
- " 38.474 | \n",
- " 6.495 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " I WILL TAKE MICHIGAN STATE. | \n",
- " 43.304 | \n",
- " 4.963 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " FLORIDA IS THE MOST COMPLETE | \n",
- " 47.168 | \n",
- " 4.581 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 18 | \n",
- " TEAM IN THE FIELD. | \n",
- " 48.300 | \n",
- " 4.515 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " CLEMSON IS MY DARK HORSE. | \n",
- " 51.782 | \n",
- " 3.098 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 20 | \n",
- " THEY ARE RUGGED. | \n",
- " 52.848 | \n",
- " 3.131 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " THEY ALMOST GOT TO THE FINAL | \n",
- " 54.913 | \n",
- " 2.499 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " FOUR LAST YEAR. | \n",
- " 56.012 | \n",
- " 3.798 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 23 | \n",
- " AND THEN WE HAVE DUKE. | \n",
- " 57.445 | \n",
- " 4.763 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 24 | \n",
- " I AM TAKING CLEMSON IN SAN | \n",
- " 59.843 | \n",
- " 6.978 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 25 | \n",
- " ANTONIO. | \n",
- " 62.241 | \n",
- " 6.979 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 26 | \n",
- " I AM GOING WITH THE GATORS AS MY | \n",
- " 66.855 | \n",
- " 2.598 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 27 | \n",
- " CHAMP. | \n",
- " 69.253 | \n",
- " 3.997 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 28 | \n",
- " >> YOU ARE GETTING REALLY BOLD. | \n",
- " 70.885 | \n",
- " 4.631 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 29 | \n",
- " I AM A LITTLE LESS BOLD. | \n",
- " 73.284 | \n",
- " 4.397 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 30 | \n",
- " I HAD IOWA STATE HERE BUT | \n",
- " 77.148 | \n",
- " 2.698 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 31 | \n",
- " BECAUSE OF THE INJURY TO GILBERT | \n",
- " 77.714 | \n",
- " 4.430 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 32 | \n",
- " I WILL TAKE THE SPARTANS. | \n",
- " 79.879 | \n",
- " 4.581 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 33 | \n",
- " I WILL TAKE DUKE AND FLORIDA IN | \n",
- " 82.561 | \n",
- " 2.798 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 34 | \n",
- " THE FINAL AND THE BLUE DEVILS, | \n",
- " 84.493 | \n",
- " 3.731 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 35 | \n",
- " ASSUMING THAT COOPER FLAGG IS | \n",
- " 85.393 | \n",
- " 3.431 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 36 | \n",
- " HEALTHY, THEY ARE CUTTING DOWN | \n",
- " 88.258 | \n",
- " 0.865 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 37 | \n",
- " THE NETS. | \n",
- " 88.857 | \n",
- " 3.697 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 38 | \n",
- " >> HERE WE GO. | \n",
- " 90.789 | \n",
- " 2.532 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 39 | \n",
- " I LIKE AUBURN COMING OUT OF THE | \n",
- " 92.721 | \n",
- " 2.632 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 40 | \n",
- " SOUTH. | \n",
- " 93.354 | \n",
- " 2.965 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 41 | \n",
- " I LIKE ST. JOHN LEADING OUT OF | \n",
- " 95.719 | \n",
- " 2.765 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 42 | \n",
- " THE WEST. | \n",
- " 96.352 | \n",
- " 3.714 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 43 | \n",
- " DUKE IN THE EAST, HOUSTON IN THE | \n",
- " 98.517 | \n",
- " 3.814 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 44 | \n",
- " MIDWEST. | \n",
- " 100.099 | \n",
- " 3.431 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 45 | \n",
- " IN MY CHAMPIONSHIP GAME, I HAVE | \n",
- " 102.731 | \n",
- " 2.964 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 46 | \n",
- " HOUSTON AND AUBURN AND I HAVE | \n",
- " 103.563 | \n",
- " 4.664 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 47 | \n",
- " THE AUBURN TIGERS FINISHING THE | \n",
- " 105.729 | \n",
- " 3.031 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 48 | \n",
- " SEASON AS NATIONAL CHAMPS. | \n",
- " 108.260 | \n",
- " 3.132 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 49 | \n",
- " ADAM: I THOUGHT HE WOULD GO BIG | \n",
- " 108.793 | \n",
- " 2.932 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 50 | \n",
- " EAST. | \n",
- " 111.425 | \n",
- " 3.264 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 51 | \n",
- " >> THREE DIFFERENT CHAMPS FOR | \n",
- " 112.957 | \n",
- " 1.899 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 52 | \n",
- " US. | \n",
- " 114.723 | \n",
- " 3.015 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 53 | \n",
- " ADAM: THE BIG EAST HAVE FIVE IN | \n",
- " 114.889 | \n",
- " 5.514 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 54 | \n",
- " THE TOURNAMENT FOR US. | \n",
- " 117.771 | \n",
- " 3.065 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 55 | \n",
- " YOU MIXED IT UP. | \n",
- " 120.436 | \n",
- " 3.597 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 56 | \n",
- " >> HE LEARNED NOT TO GO WITH MY | \n",
- " 120.869 | \n",
- " 3.331 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 57 | \n",
- " PICK. | \n",
- " 124.067 | \n",
- " 1.415 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 58 | \n",
- " CLARK: YOU GUY | \n",
- " 124.300 | \n",
- " 1.182 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Text Start Time Duration Video ID\n",
- "0 . 0.283 1.248 -rjnvL9LL3U\n",
- "1 THAT WILL BE A MONSTER GAME. 0.383 5.879 -rjnvL9LL3U\n",
- "2 >> LET'S HOPE EVERYBODY -- HELP 3.230 8.195 -rjnvL9LL3U\n",
- "3 EVERYBODY OUT MORE. 6.295 9.744 -rjnvL9LL3U\n",
- "4 THE 16 SEED HAS ONE TWO OF 24 11.458 6.346 -rjnvL9LL3U\n",
- "5 MEETINGS. 16.072 4.864 -rjnvL9LL3U\n",
- "6 ANALYTICS SAY ALL THE TOP THREE 17.837 6.963 -rjnvL9LL3U\n",
- "7 SEEDS WILL ADVANCE. 20.969 6.129 -rjnvL9LL3U\n",
- "8 THE NINE SEED HAS WON TWO THIRDS 24.833 4.197 -rjnvL9LL3U\n",
- "9 OF THE GAMES AGAINST THE EIGHT 27.131 2.566 -rjnvL9LL3U\n",
- "10 SEED. 29.063 3.481 -rjnvL9LL3U\n",
- "11 YOU ALL HAD UCONN WINNING IT 31.595 3.348 -rjnvL9LL3U\n",
- "12 LAST YEAR. 32.577 5.264 -rjnvL9LL3U\n",
- "13 WHO MAKES IT TO THE FINAL FOUR? 34.976 3.464 -rjnvL9LL3U\n",
- "14 AND WHO CUTS DOWN THE NETS? 37.874 5.396 -rjnvL9LL3U\n",
- "15 CLARK: I GET TO CHOOSE FIRST? 38.474 6.495 -rjnvL9LL3U\n",
- "16 I WILL TAKE MICHIGAN STATE. 43.304 4.963 -rjnvL9LL3U\n",
- "17 FLORIDA IS THE MOST COMPLETE 47.168 4.581 -rjnvL9LL3U\n",
- "18 TEAM IN THE FIELD. 48.300 4.515 -rjnvL9LL3U\n",
- "19 CLEMSON IS MY DARK HORSE. 51.782 3.098 -rjnvL9LL3U\n",
- "20 THEY ARE RUGGED. 52.848 3.131 -rjnvL9LL3U\n",
- "21 THEY ALMOST GOT TO THE FINAL 54.913 2.499 -rjnvL9LL3U\n",
- "22 FOUR LAST YEAR. 56.012 3.798 -rjnvL9LL3U\n",
- "23 AND THEN WE HAVE DUKE. 57.445 4.763 -rjnvL9LL3U\n",
- "24 I AM TAKING CLEMSON IN SAN 59.843 6.978 -rjnvL9LL3U\n",
- "25 ANTONIO. 62.241 6.979 -rjnvL9LL3U\n",
- "26 I AM GOING WITH THE GATORS AS MY 66.855 2.598 -rjnvL9LL3U\n",
- "27 CHAMP. 69.253 3.997 -rjnvL9LL3U\n",
- "28 >> YOU ARE GETTING REALLY BOLD. 70.885 4.631 -rjnvL9LL3U\n",
- "29 I AM A LITTLE LESS BOLD. 73.284 4.397 -rjnvL9LL3U\n",
- "30 I HAD IOWA STATE HERE BUT 77.148 2.698 -rjnvL9LL3U\n",
- "31 BECAUSE OF THE INJURY TO GILBERT 77.714 4.430 -rjnvL9LL3U\n",
- "32 I WILL TAKE THE SPARTANS. 79.879 4.581 -rjnvL9LL3U\n",
- "33 I WILL TAKE DUKE AND FLORIDA IN 82.561 2.798 -rjnvL9LL3U\n",
- "34 THE FINAL AND THE BLUE DEVILS, 84.493 3.731 -rjnvL9LL3U\n",
- "35 ASSUMING THAT COOPER FLAGG IS 85.393 3.431 -rjnvL9LL3U\n",
- "36 HEALTHY, THEY ARE CUTTING DOWN 88.258 0.865 -rjnvL9LL3U\n",
- "37 THE NETS. 88.857 3.697 -rjnvL9LL3U\n",
- "38 >> HERE WE GO. 90.789 2.532 -rjnvL9LL3U\n",
- "39 I LIKE AUBURN COMING OUT OF THE 92.721 2.632 -rjnvL9LL3U\n",
- "40 SOUTH. 93.354 2.965 -rjnvL9LL3U\n",
- "41 I LIKE ST. JOHN LEADING OUT OF 95.719 2.765 -rjnvL9LL3U\n",
- "42 THE WEST. 96.352 3.714 -rjnvL9LL3U\n",
- "43 DUKE IN THE EAST, HOUSTON IN THE 98.517 3.814 -rjnvL9LL3U\n",
- "44 MIDWEST. 100.099 3.431 -rjnvL9LL3U\n",
- "45 IN MY CHAMPIONSHIP GAME, I HAVE 102.731 2.964 -rjnvL9LL3U\n",
- "46 HOUSTON AND AUBURN AND I HAVE 103.563 4.664 -rjnvL9LL3U\n",
- "47 THE AUBURN TIGERS FINISHING THE 105.729 3.031 -rjnvL9LL3U\n",
- "48 SEASON AS NATIONAL CHAMPS. 108.260 3.132 -rjnvL9LL3U\n",
- "49 ADAM: I THOUGHT HE WOULD GO BIG 108.793 2.932 -rjnvL9LL3U\n",
- "50 EAST. 111.425 3.264 -rjnvL9LL3U\n",
- "51 >> THREE DIFFERENT CHAMPS FOR 112.957 1.899 -rjnvL9LL3U\n",
- "52 US. 114.723 3.015 -rjnvL9LL3U\n",
- "53 ADAM: THE BIG EAST HAVE FIVE IN 114.889 5.514 -rjnvL9LL3U\n",
- "54 THE TOURNAMENT FOR US. 117.771 3.065 -rjnvL9LL3U\n",
- "55 YOU MIXED IT UP. 120.436 3.597 -rjnvL9LL3U\n",
- "56 >> HE LEARNED NOT TO GO WITH MY 120.869 3.331 -rjnvL9LL3U\n",
- "57 PICK. 124.067 1.415 -rjnvL9LL3U\n",
- "58 CLARK: YOU GUY 124.300 1.182 -rjnvL9LL3U"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "snippets_df['Video ID'] = video_id\n",
- "snippets_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "f96ac4f5",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Saving CSV file to: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/prediction_acquition-youtube/../data/yt/raw_transcripts/-rjnvL9LL3U.csv\n"
- ]
- }
- ],
- "source": [
- "base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
- "save_data_path = os.path.join(base_data_path, \"yt\", \"raw_transcripts\")\n",
- "DataProcessing.save_to_file(snippets_df, path=save_data_path, prefix=f'{video_id}', save_file_type='csv', include_version=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1de284e0",
- "metadata": {},
- "source": [
- "## Filtering English Auto Generated Transcripts"
- ]
- },
{
"cell_type": "code",
"execution_count": 5,
- "id": "841b2680",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'id': '-rjnvL9LL3U', 'language': 'English - DTVCC1', 'language_code': 'en', 'is_generated': False}\n",
- "{'id': '-rjnvL9LL3U', 'language': 'English (auto-generated)', 'language_code': 'en', 'is_generated': True}\n",
- "{'id': 'ZZN7BAYeOtc', 'language': 'English (auto-generated)', 'language_code': 'en', 'is_generated': True}\n",
- "{'id': 'mBK8o5orBbE', 'language': 'English (auto-generated)', 'language_code': 'en', 'is_generated': True}\n",
- "{'id': 'Z0xP3GNpjkw', 'language': 'English - DTVCC1', 'language_code': 'en', 'is_generated': False}\n",
- "{'id': 'Z0xP3GNpjkw', 'language': 'Scots - CC1', 'language_code': 'sco', 'is_generated': False}\n",
- "{'id': 'Z0xP3GNpjkw', 'language': 'English (auto-generated)', 'language_code': 'en', 'is_generated': True}\n"
- ]
- }
- ],
- "source": [
- "video_ids = [\"-rjnvL9LL3U\", \"ZZN7BAYeOtc\", \"mBK8o5orBbE\", \"Z0xP3GNpjkw\"]\n",
- "\n",
- "results = {}\n",
- "\n",
- "for vid in video_ids:\n",
- " transcript_list = ytt_api.list(vid)\n",
- " for transcript in transcript_list :\n",
- " print({\n",
- " \"id\": vid,\n",
- " \"language\": transcript.language,\n",
- " \"language_code\": transcript.language_code,\n",
- " \"is_generated\": transcript.is_generated})"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "0020cd6e",
+ "id": "9e7c66e3",
"metadata": {},
"outputs": [],
"source": [
@@ -1225,356 +137,364 @@
},
{
"cell_type": "code",
- "execution_count": 31,
- "id": "de9c1d7b",
+ "execution_count": 6,
+ "id": "9be1a817",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[FetchedTranscriptSnippet(text='Welcome to CBS Sports HQ presented by', start=2.24, duration=4.639),\n",
- " FetchedTranscriptSnippet(text='Hines. It has to be Hines and this has', start=4.48, duration=3.6),\n",
- " FetchedTranscriptSnippet(text=\"to be a good show because it's a Friday\", start=6.879, duration=2.961),\n",
- " FetchedTranscriptSnippet(text=\"of Super Bowl week. I'm Jenny Dell. The\", start=8.08, duration=3.679),\n",
- " FetchedTranscriptSnippet(text='Super Bowl, the day after tomorrow. We', start=9.84, duration=3.759),\n",
- " FetchedTranscriptSnippet(text=\"are almost there. We've made it. Let's\", start=11.759, duration=3.04),\n",
- " FetchedTranscriptSnippet(text='get through this Friday afternoon', start=13.599, duration=3.431)]"
- ]
- },
- "execution_count": 31,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "video_id = \"Z0xP3GNpjkw\" #-rjnvL9LL3U ZZN7BAYeOtc mBK8o5orBbE LXPQrZV4Cfw fUmJAtFEGn8 MTVAkVkkaz4 Z0xP3GNpjkw\n",
- "transcript_snippets = fetch_en_auto_transcript_snippets(ytt_api, video_id)\n",
- "transcript_snippets[:7]"
+ "def download_audio(video_id, output_path=\"audio.%(ext)s\"):\n",
+ " \"\"\"\n",
+ " Pass a YouTube VideoID and download the audio file.\n",
+ " \"\"\"\n",
+ " url = f\"https://www.youtube.com/watch?v={video_id}\"\n",
+ " \n",
+ " ydl_opts = {\n",
+ " 'format': 'bestaudio/best',\n",
+ " 'outtmpl': output_path,\n",
+ " 'quiet': True\n",
+ " }\n",
+ " \n",
+ " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
+ " ydl.download([url])"
]
},
{
"cell_type": "code",
- "execution_count": 32,
- "id": "bd62f5a2",
+ "execution_count": 7,
+ "id": "98d121d6",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " Welcome to CBS Sports HQ presented by | \n",
- " 2.240 | \n",
- " 4.639 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " Hines. It has to be Hines and this has | \n",
- " 4.480 | \n",
- " 3.600 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " to be a good show because it's a Friday | \n",
- " 6.879 | \n",
- " 2.961 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " of Super Bowl week. I'm Jenny Dell. The | \n",
- " 8.080 | \n",
- " 3.679 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " Super Bowl, the day after tomorrow. We | \n",
- " 9.840 | \n",
- " 3.759 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Text Start Time Duration\n",
- "0 Welcome to CBS Sports HQ presented by 2.240 4.639\n",
- "1 Hines. It has to be Hines and this has 4.480 3.600\n",
- "2 to be a good show because it's a Friday 6.879 2.961\n",
- "3 of Super Bowl week. I'm Jenny Dell. The 8.080 3.679\n",
- "4 Super Bowl, the day after tomorrow. We 9.840 3.759"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "snippets_df = extract_data(transcript_snippets)\n",
- "snippets_df.head()"
+ "def whisper_result_to_dataframe(result: Mapping[str, Any], video_id: str) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Converts the dict from pipeline Whisper into a DataFrame:\n",
+ " Text, Start Time, Duration, Video ID.\n",
+ " \"\"\"\n",
+ " rows = []\n",
+ " chunks = result.get(\"chunks\") or []\n",
+ " for chunk in chunks:\n",
+ " text = (chunk.get(\"text\") or \"\").strip()\n",
+ " ts = chunk.get(\"timestamp\")\n",
+ " if not text or ts is None:\n",
+ " continue\n",
+ " start_s = end_s = None\n",
+ " if isinstance(ts, (list, tuple)):\n",
+ " if len(ts) >= 2:\n",
+ " a, b = ts[0], ts[1]\n",
+ " \n",
+ " if a is not None and b is not None:\n",
+ " try:\n",
+ " start_s = float(a)\n",
+ " end_s = float(b)\n",
+ " except (TypeError, ValueError):\n",
+ " continue\n",
+ " \n",
+ " elif a is not None and b is None:\n",
+ " continue \n",
+ " elif len(ts) == 1 and ts[0] is not None:\n",
+ " try:\n",
+ " start_s = float(ts[0])\n",
+ " end_s = start_s \n",
+ " except (TypeError, ValueError):\n",
+ " continue\n",
+ " elif isinstance(ts, dict):\n",
+ " a = ts.get(\"start\")\n",
+ " b = ts.get(\"end\")\n",
+ " if a is None or b is None:\n",
+ " continue\n",
+ " try:\n",
+ " start_s = float(a)\n",
+ " end_s = float(b)\n",
+ " except (TypeError, ValueError):\n",
+ " continue\n",
+ " else:\n",
+ " continue\n",
+ " if start_s is None or end_s is None:\n",
+ " continue\n",
+ " if not math.isfinite(start_s) or not math.isfinite(end_s):\n",
+ " continue\n",
+ " duration = end_s - start_s\n",
+ " if duration < 0:\n",
+ " continue\n",
+ " rows.append({\n",
+ " \"Text\": text,\n",
+ " \"Start Time\": start_s,\n",
+ " \"Duration\": duration,\n",
+ " \"Video ID\": video_id,\n",
+ " })\n",
+ " return pd.DataFrame(rows)"
]
},
{
"cell_type": "code",
- "execution_count": 33,
- "id": "7fd75561",
+ "execution_count": 8,
+ "id": "8f8c542c",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- " Video ID | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " Welcome to CBS Sports HQ presented by | \n",
- " 2.240 | \n",
- " 4.639 | \n",
- " Z0xP3GNpjkw | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " Hines. It has to be Hines and this has | \n",
- " 4.480 | \n",
- " 3.600 | \n",
- " Z0xP3GNpjkw | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " to be a good show because it's a Friday | \n",
- " 6.879 | \n",
- " 2.961 | \n",
- " Z0xP3GNpjkw | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " of Super Bowl week. I'm Jenny Dell. The | \n",
- " 8.080 | \n",
- " 3.679 | \n",
- " Z0xP3GNpjkw | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " Super Bowl, the day after tomorrow. We | \n",
- " 9.840 | \n",
- " 3.759 | \n",
- " Z0xP3GNpjkw | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Text Start Time Duration Video ID\n",
- "0 Welcome to CBS Sports HQ presented by 2.240 4.639 Z0xP3GNpjkw\n",
- "1 Hines. It has to be Hines and this has 4.480 3.600 Z0xP3GNpjkw\n",
- "2 to be a good show because it's a Friday 6.879 2.961 Z0xP3GNpjkw\n",
- "3 of Super Bowl week. I'm Jenny Dell. The 8.080 3.679 Z0xP3GNpjkw\n",
- "4 Super Bowl, the day after tomorrow. We 9.840 3.759 Z0xP3GNpjkw"
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "snippets_df['Video ID'] = video_id\n",
- "snippets_df.head()"
+ "def extract_transcript_df(\n",
+ " video_id: str,\n",
+ " *,\n",
+ " extract_with_whisper: bool = False,\n",
+ " ytt_api: YouTubeTranscriptApi | None = None,\n",
+ " whisper_pipe=None,\n",
+ " audio_dir: str | None = None,\n",
+ " audio_ext: str = \"webm\",\n",
+ " whisper_model: str = \"openai/whisper-large-v3\",\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Unified transcript extractor.\n",
+ " - If extract_with_whisper=False:\n",
+ " uses YouTubeTranscriptApi + your English(auto-generated) filter\n",
+ " - If extract_with_whisper=True:\n",
+ " downloads audio and transcribes with Whisper pipeline\n",
+ " \"\"\"\n",
+ " if not extract_with_whisper:\n",
+ " if ytt_api is None:\n",
+ " ytt_api = YouTubeTranscriptApi()\n",
+ " \n",
+ " snippets = fetch_en_auto_transcript_snippets(ytt_api, video_id)\n",
+ " \n",
+ " df = extract_data(snippets)\n",
+ " \n",
+ " if \"Video ID\" not in df.columns:\n",
+ " df[\"Video ID\"] = video_id\n",
+ " return df[[\"Text\", \"Start Time\", \"Duration\", \"Video ID\"]]\n",
+ " \n",
+ " if audio_dir is None:\n",
+ " audio_dir = os.getcwd()\n",
+ " os.makedirs(audio_dir, exist_ok=True)\n",
+ " audio_path = os.path.join(audio_dir, f\"{video_id}.{audio_ext}\")\n",
+ " \n",
+ " download_audio(video_id, output_path=audio_path)\n",
+ " \n",
+ " if whisper_pipe is None:\n",
+ " whisper_pipe = pipeline(\n",
+ " \"automatic-speech-recognition\",\n",
+ " model=whisper_model,\n",
+ " return_timestamps=True,\n",
+ " )\n",
+ " result = whisper_pipe(audio_path)\n",
+ " \n",
+ " return whisper_result_to_dataframe(result, video_id)"
]
},
{
- "cell_type": "code",
- "execution_count": 34,
+ "cell_type": "markdown",
+ "id": "65bd97ff",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Saving CSV file to: c:\\Users\\adria\\OneDrive\\Área de Trabalho\\UF Data Studio\\predictions\\prediction_acquition-youtube\\../data\\yt\\raw_transcripts\\Z0xP3GNpjkw.csv\n"
- ]
- }
- ],
"source": [
- "base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
- "save_data_path = os.path.join(base_data_path, \"yt\", \"raw_transcripts\")\n",
- "DataProcessing.save_to_file(snippets_df, path=save_data_path, prefix=f'{video_id}', save_file_type='csv', include_version=False)"
+ "## Transcripts \n",
+ "\n",
+ "| # | Title | Link | Video ID | DOMAIN |\n",
+ "|---|---|---|---|---|\n",
+ "| 1 | Immediate 2025 NCAA tournament Final Four and championship picks | https://www.youtube.com/watch?v=-rjnvL9LL3U | -rjnvL9LL3U | SPORTS |\n",
+ "| 2 | FULL PREVIEW & PICKS: Patriots vs. Seahawks Super Bowl LX 🏆 Who wins the Lombardi Trophy? \\| NFL Live | https://www.youtube.com/watch?v=ZZN7BAYeOtc | ZZN7BAYeOtc | SPORTS |\n",
+ "| 3 | FIRST TAKE'S SUPER BOWL PICKS! The crew is going with... 😱 | https://www.youtube.com/watch?v=mBK8o5orBbE | mBK8o5orBbE | SPORTS |\n",
+ "| 4 | NFL Predictions and Picks For Super Bowl LX [Patriots vs Seahawks] - Best Bets ✅ | https://www.youtube.com/watch?v=LXPQrZV4Cfw | LXPQrZV4Cfw | SPORTS |\n",
+ "| 5 | Rich Eisen’s Pick to Win the Seahawks vs Patriots Super Bowl LX Is….? - The Rich Eisen Show | https://www.youtube.com/watch?v=fUmJAtFEGn8 | fUmJAtFEGn8 | SPORTS |\n",
+ "| 6 | The Pat McAfee Show's Picks For Super Bowl LX | https://www.youtube.com/watch?v=MTVAkVkkaz4 | MTVAkVkkaz4 | SPORTS |\n",
+ "| 7 | Super Bowl LX On-Site Preview: Picks, Predictions, Everything you need to know for Patriots-Seahawks | https://www.youtube.com/watch?v=Z0xP3GNpjkw | Z0xP3GNpjkw | SPORTS |\n",
+ "| 8 | Sharp 600 - BEST SBLX Patriots vs. Seahawks Any Time Touchdown Scorers & Props | https://www.youtube.com/watch?v=6STv2GFNB6I | 6STv2GFNB6I | SPORTS |\n",
+ "| 9 | Super Bowl LX - Patriots vs. Seahawks - Picks and Predictions w/ Todd Fuhrman - Sharp 600 | https://www.youtube.com/watch?v=FPl-F2k_KtM | FPl-F2k_KtM | SPORTS |\n",
+ "| 10 | Predicting the UEFA Champions League TO THE FINAL | https://www.youtube.com/watch?v=AoE8KFAXHSc | AoE8KFAXHSc | SPORTS |\n",
+ "| 11 | Insane Tennis Predictions For 2026 | https://www.youtube.com/watch?v=jCe-bY1nP7o | jCe-bY1nP7o | SPORTS |\n",
+ "| 12 | MY EARLY WORLD CUP 2026 PREDICTION | https://www.youtube.com/watch?v=SenmTFJUqso | SenmTFJUqso | SPORTS |\n",
+ "| 13 | FINAL I Predict The FULL 2026 Fifa World Cup… And It Gets CRAZY 😱 | https://www.youtube.com/watch?v=d2k__EvyxHM | d2k__EvyxHM | SPORTS |\n",
+ "| 14 | My Official 2026 NBA Season Predictions (Finals, Trades, Awards) | https://www.youtube.com/watch?v=VQPYwF94gDY | VQPYwF94gDY | SPORTS |\n",
+ "| 15 | Avery Johnson’s 5 BOLD PREDICTIONS for 2025 NBA Season: Wembanyama Wins MVP? Clippers Miss Playoffs? | https://www.youtube.com/watch?v=eogPbRmySCk | eogPbRmySCk | SPORTS |\n",
+ "| 16 | Predicting the ENTIRE 2026 NBA Season | https://www.youtube.com/watch?v=sT51Y-gP9lE | sT51Y-gP9lE | SPORTS |\n",
+ "| 17 | Morgan Stanley's Wilson Bullish on Stocks for 2026 | https://www.youtube.com/watch?v=sj8tHn6lGAs | sj8tHn6lGAs | FINANCE |\n",
+ "| 18 | Our 2026 Financial Predictions | https://www.youtube.com/watch?v=mkNOcK-S8XQ | mkNOcK-S8XQ | FINANCE |\n",
+ "| 19 | Spring Forecast 2026: Wintry Weather Isn’t Finished Yet! | https://www.youtube.com/watch?v=ysSuV0_vnYI | ysSuV0_vnYI | WEATHER |\n",
+ "| 20 | 2026 Weather Outlook: La Niña’s Exit, El Niño’s Potential and the Signals Farmers Should Watch | https://www.youtube.com/watch?v=nGTBk-VI4Ew | nGTBk-VI4Ew | WEATHER |\n",
+ "| 21 | Farmers' Almanac Winter Weather Forecast 2025 - 2026 | https://www.youtube.com/watch?v=nsdAJlyjVeA | nsdAJlyjVeA | WEATHER |\n",
+ "| 22 | Economist makes dire prediction about US employment rate | https://www.youtube.com/watch?v=DieTKcXFyi8 | DieTKcXFyi8 | POLITICAL |\n",
+ "| 23 | JP Morgan strategist predicts 2026 inflation outlook | https://www.youtube.com/watch?v=l4Gdl6SCTQg | l4Gdl6SCTQg | POLITICAL |\n",
+ "| 24 | JPMorgan releases new prediction for the US economy in 2026 | https://www.youtube.com/watch?v=hdib59Tj76E | hdib59Tj76E | POLITICAL |\n",
+ "| 25 | This Fed will remain ‘paralyzed’: Expert makes prediction on future rate hikes | https://www.youtube.com/watch?v=m8tsZKtdtME | m8tsZKtdtME | POLITICAL |\n",
+ "| 26 | 2026 HOUSE Prediction Map Based on NEW POLLS! | https://www.youtube.com/watch?v=DEhAFSPp6lQ | DEhAFSPp6lQ | POLITICAL |\n",
+ "| 27 | Saagar REVEALS 2024 PREDICTION: Trump WINS | https://www.youtube.com/watch?v=KIwCGebr5xg | KIwCGebr5xg | POLITICAL |\n",
+ "| 28 | American Cancer Society Predicting More Cancer Cases in 2026 | https://www.youtube.com/watch?v=oSIQ8l6SqAc | oSIQ8l6SqAc | HEALTH |\n",
+ "| 29 | Doctors predict uterine cancer rates to rise | https://www.youtube.com/watch?v=WIxqbAJw5hU | WIxqbAJw5hU | HEALTH |\n",
+ "| 30 | Just over a third of U.S. adults are obese. By 2030, 42 percent will be, says a forecast released Mo | https://www.youtube.com/watch?v=Y8epuW7-TXw | Y8epuW7-TXw | HEALTH |"
]
},
{
"cell_type": "markdown",
- "id": "8601966f",
+ "id": "249fe832",
"metadata": {},
"source": [
- "## Whisper"
+ "## Extracting Transcriptions"
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "0127fc80",
+ "cell_type": "markdown",
+ "id": "5231c785",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Collecting yt-dlp\n",
- " Downloading yt_dlp-2026.3.17-py3-none-any.whl.metadata (182 kB)\n",
- "Downloading yt_dlp-2026.3.17-py3-none-any.whl (3.3 MB)\n",
- " ---------------------------------------- 0.0/3.3 MB ? eta -:--:--\n",
- " ---------------------------------------- 3.3/3.3 MB 17.7 MB/s eta 0:00:00\n",
- "Installing collected packages: yt-dlp\n",
- "Successfully installed yt-dlp-2026.3.17\n"
- ]
- }
- ],
"source": [
- "#!pip install yt-dlp"
+ "There are **two ways** to obtain a transcript for a YouTube video:\n",
+ "\n",
+ "- **YouTube transcripts (fast, no download)**: uses `YouTubeTranscriptApi()` to fetch captions that already exist on YouTube. In this notebook, we prioritize **English auto-generated** captions when available.\n",
+ "\n",
+ "- **Whisper transcripts (slower, download required)**: downloads the video audio and transcribes it locally using a Whisper model via the Hugging Face `pipeline(...)`.\n",
+ "\n",
+ "To make this easy to use, we wrap both approaches in a single function: `extract_transcript_df(...)`.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "**Option A — Do NOT use Whisper**\n",
+ "\n",
+ "Use this when you want the fastest approach, the video has English auto-generated captions availabl and you do not have enough memory available.\n",
+ "\n",
+ "```python\n",
+ "df = extract_transcript_df(\"Z0xP3GNpjkw\", extract_with_whisper=False)\n",
+ "df.head()\n",
+ "```\n",
+ "\n",
+ "**Option B — Use Whisper (download + transcribe)**\n",
+ "\n",
+ "Use this when the video has no captions / captions are disabled, or\n",
+ "you want transcripts that don’t depend on YouTube caption (more reliable)\n",
+ "\n",
+ "```python\n",
+ "from transformers import pipeline\n",
+ "pipe = pipeline(\n",
+ " \"automatic-speech-recognition\",\n",
+ " model=\"openai/whisper-large-v3\",\n",
+ " return_timestamps=True\n",
+ ")\n",
+ "df = extract_transcript_df(\n",
+ " \"Z0xP3GNpjkw\",\n",
+ " extract_with_whisper=True,\n",
+ " whisper_pipe=pipe,\n",
+ " audio_dir=save_data_path\n",
+ ")\n",
+ "df.head()\n",
+ "```\n",
+ "\n",
+ "---\n",
+ "Computational note about whisper-large-v3: openai/whisper-large-v3 is high quality but computationally heavy.\n",
+ "\n",
+ "---\n",
+ "**Saving the final DataFrame**\n",
+ "\n",
+ "After you generate the transcript DataFrame (either method), save it so you don’t need to re-run extraction:\n",
+ "\n",
+ "```python\n",
+ "base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
+ "save_data_path = os.path.join(base_data_path, \"yt\", \"transcripts\") # choose a folder name you like\n",
+ "DataProcessing.save_to_file(\n",
+ " df,\n",
+ " path=save_data_path,\n",
+ " prefix=video_id,\n",
+ " save_file_type=\"csv\",\n",
+ " include_version=False\n",
+ ")\n",
+ "```\n",
+ "\n",
+ "This will create a CSV for that \"video_id\" that you can reuse later in your pipeline.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "**Whisper setup (only needed if using `extract_with_whisper=True`)**\n",
+ "\n",
+ "Whisper transcription in this notebook uses the Hugging Face `pipeline(\"automatic-speech-recognition\", ...)`.\n",
+ "If you plan to use Whisper, make sure you have the required dependencies installed.\n",
+ "\n",
+ "Install Python packages:\n",
+ "\n",
+ "```python\n",
+ "#!pip install -U transformers accelerate yt-dlp\n",
+ "```"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "ce00da08",
+ "execution_count": null,
+ "id": "ddaa9dfa",
"metadata": {},
"outputs": [],
"source": [
- "import yt_dlp\n",
- "from transformers import pipeline\n",
- "from typing import Any, Mapping"
+ "video_id = \"SenmTFJUqso\" #d2k__EvyxHM VQPYwF94gDY eogPbRmySCk sT51Y-gP9lE"
]
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "bc7691ca",
+ "execution_count": 10,
+ "id": "883cb0d2",
"metadata": {},
"outputs": [],
"source": [
- "# Youtube Audio Download\n",
- "\n",
- "def download_audio(video_id, output_path=\"audio.%(ext)s\"):\n",
- " url = f\"https://www.youtube.com/watch?v={video_id}\"\n",
- " \n",
- " ydl_opts = {\n",
- " 'format': 'bestaudio/best',\n",
- " 'outtmpl': output_path,\n",
- " 'quiet': True\n",
- " }\n",
- " \n",
- " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
- " ydl.download([url])"
+ "# df = extract_transcript_df(video_id, extract_with_whisper=False)"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "0936c558",
+ "execution_count": 11,
+ "id": "8dc416b1",
"metadata": {},
"outputs": [],
"source": [
- "base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
- "save_data_path = os.path.join(base_data_path, \"yt\", \"mp3_audio\")\n",
- "os.makedirs(save_data_path, exist_ok=True) "
+ "# df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "3d9ce5c7",
+ "execution_count": 12,
+ "id": "70ec6c80",
"metadata": {},
"outputs": [],
"source": [
- "video_id = \"ZZN7BAYeOtc\" #-rjnvL9LL3U ZZN7BAYeOtc mBK8o5orBbE LXPQrZV4Cfw fUmJAtFEGn8 MTVAkVkkaz4 Z0xP3GNpjkw\n",
- "\n",
- "outtmpl = os.path.join(save_data_path, f\"{video_id}.webm\")"
+ "# base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
+ "# save_data_path = os.path.join(base_data_path, \"yt\", \"raw_transcripts\")\n",
+ "# DataProcessing.save_to_file(df, path=save_data_path, prefix=f'{video_id}', save_file_type='csv', include_version=False)"
]
},
{
"cell_type": "code",
- "execution_count": 24,
- "id": "d7fea276",
+ "execution_count": 10,
+ "id": "71706281",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "WARNING: [youtube] No supported JavaScript runtime could be found. Only deno is enabled by default; to use another runtime add --js-runtimes RUNTIME[:PATH] to your command/config. YouTube extraction without a JS runtime has been deprecated, and some formats may be missing. See https://github.com/yt-dlp/yt-dlp/wiki/EJS for details on installing one\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " \r"
+ "Device set to use cpu\n"
]
}
],
"source": [
- "download_audio(video_id, output_path=outtmpl)"
+ "# change to \"openai/whisper-large-v3\" if more memory is available (openai/whisper-base, openai/whisper-small, openai/whisper-medium are other options)\n",
+ "\n",
+ "pipe = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-large-v3\", return_timestamps=True)"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "a73f63a2",
+ "execution_count": null,
+ "id": "98992167",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Device set to use cpu\n",
+ "WARNING: [youtube] No supported JavaScript runtime could be found. Only deno is enabled by default; to use another runtime add --js-runtimes RUNTIME[:PATH] to your command/config. YouTube extraction without a JS runtime has been deprecated, and some formats may be missing. See https://github.com/yt-dlp/yt-dlp/wiki/EJS for details on installing one\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \r"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
"c:\\Users\\adria\\OneDrive\\Área de Trabalho\\UF Data Studio\\predictions\\.venv\\Lib\\site-packages\\transformers\\models\\whisper\\generation_whisper.py:573: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n",
" warnings.warn(\n",
"Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.\n"
@@ -1582,727 +502,13 @@
}
],
"source": [
- "# Run Whisper pipeline\n",
- "\n",
- "pipe = pipeline(\n",
- " \"automatic-speech-recognition\",\n",
- " model=\"openai/whisper-base\", # change to \"openai/whisper-large-v3\" if more memory is available\n",
- " return_timestamps=True\n",
- ")\n",
- "\n",
- "result = pipe(outtmpl)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "8e03f911",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\" We're now 50 hours till Super Bowl 60 that clock will continue to count down. It's going to be here before we know it. And let's continue the conversation around JSN, mean, obviously with the ball in his hands, he's so dynamic. But you've noticed some things when he doesn't even have the ball that you wanted to point out. And I think it's a part of why it was such a deserving offensive player of the year because of the gravity he exerts on the field. One of the stats that I love on throws over 10 yards. When the Seahawks throw to receiver not named JSN, that receiver is twice as likely as your average NFL receiver to be wide open or also significantly more likely to be open period. So if it's Cooper Cup or boat you whenever you want to see I say I'm going to like how is AJ Barnard so damn wide open a lot of the time it is because of the attention paid to JSN. He's also really impactful in the screen game. We've been talking about walkers receiver a couple plays from the ends of the season. This is in the Rams game. So often they run these post over concepts with JSN's as a two-man concept. the Rams defensive. Probably the entire second area follows J.S.S. You get the little throwback screen. You got a numbers advantage, which of course is very helpful. But in addition to the numbers and the blockers, look at all the grass in front of Kenneth Walker. You can even see the defensive when the C.S. Amd Arnold turns back. They assume he's going to throw to J.S.S.S.S. because so often Seattle does. Again, this is two couple weeks later against San Francisco. J.S.S.S.S.S. on the in-breaker. Once again, pulls the safety. Throw the screen. Again, you have the the player. And when the center first down play action, they run him so much on those overs and in breakers. It creates so much space for the other players on the game. I love that tape. It reminds me of a lot of what Andre Johnson used to impact in Houston. Gary Kubiak, Kyle Shanahan. I remember Kyle often talking about install. That was the part of the game where he would move Andre in all these different places. Andre would be in the slot. the motion out to end into how across the formation, bring them back, line them up in the backfield. And I think what you'll see in this game is Clint Kubiak doing that early on. Partly to have success, partly to get information. All right, what are you doing? What's New England's plan? When I moved J.S. in on first down from the slot outside in, what's their call? What's their check? Vice-person, what not. And when you hear Cooper Cup talk about the route running, it reminds me of Isaac Bruce. know with a shoulders never go up or down. They're just kind of steady. There's just no idea from a defensive back of like what's the likeliest route. Yeah, I think what I love about what you both are saying is how great players affect the entire football field. If this was basketball and swag, I'll understand this. If this was basketball, J.S. and Kenneth Walker are constantly in the picking role where they are the focal point, right? Like they can go into the hood and say, I need the ball. But everybody else feeds off of them. And So whether it's play action or their doubling JSN now Cooper Cup gets opportunity. The fact that he lines up in the backfield he plays number one, he plays the slot. And also to me, stacks and condensed splits make it extremely hard to not pay a ton of tension to them because you're always putting him in traffic. When a receiver is in traffic, now you are attracting bodies. And if you look at some of the plays the Cooper Cups make, the tight ends make when they're they are late developing in the play because so much attention has gone to Jackson. Yeah, I think it'd be really interesting just Clint Kubiak. What's the answer? But he's so good in games. You think about the Rams game where JSN really didn't do anything in the first half. And then when you see defense as bracketing in Kubiak is so good to your point about moving him around trying to find ways to get a matchups. And JSN never gets frustrated. Yeah, I think the other piece of it is too though. He understands he's going to get his at some point. Yeah, that's the reason you don't. have to get frustrated. It's not even that we have to have you number one in the progression. We have to make sure we call this play for you. It's eventually through the flow of the offense. You'll have your chance. And when he does, he seems to always capitalize. And like Dan says, every route, the stem, the top of the route, they look the same. That was a moment happens. You separate so much. He's so talented. Adarnals 18 completions or 20 or more area yards to Smith and Jigba are the most this season by any QB wide receiver combo. And we're going to talk about the Patriots answer to this. We continue to break down every angle of the Super Bowl 60. There's a look at sparkling Levi's stadium just about 35 miles down the road. It's going to take you a while to get there though. Just telling you traffic's been a little much here. But we expect that on a Super Bowl week. the Patriots can answer JSN on the other side. Opposing quarterbacks are completing just 39% of their passes with him in coverage this postseason on pace to be the lowest in a single season postseason since NFL next year and began tracking in 2018. He's allowed no touchdowns with an interception. He's just been locked down the best way to describe it right and Dan, we spoke at length about JSN the last block. What can New England try to do defensively to answer that on the other side. Yeah, so one pass off to palms. And this is something we've talked about, JSN and the Crossers start on one side of the field in this play action or crossing concept. New England is notorious for this. They didn't do it a ton this year, but there's a clip in the Denver game. And this is going to be palms, OK? So that linebacker's responsible for what we call the Curl, the Flat Area. This is quarter. So everybody's responsible for a quarter of the field in defense. Those two defenders at the bottom of the screen are playing palms. They're reading that slot receiver. Once that slot receiver outbreaks, it becomes cover two. The corner drives it. The safety plays over the top. So that's one way, will they'll try to have extra bodies on them. The other part is this pass off when a crossing route happens. The man in coverage passes it off to the safety. He cuts it and goes over the top. The palms thing for me is always from the quarterback's thought. I'm always like, it's hard at times for us. But I always know there's a shot down the field if you're not on the same page. I'm trying to think about the amount of times I've seen San Bernard and Seattle take whole shots. The reason that I would love to see palms or love to see two-sake these highs because I always think it's a great defense for condensed splits deep crossers. When you look at teams that have had success against two of Tungalvaloa and Miami dolphins, a lot of times you've seen them in Cover 2 or palms or two buster where you're passing off things with people who can see it from high to that's why it's so difficult in cover 3. I also wonder what you do with Christian Gonzalez if you're ever say okay we got to get it more men and men do you put him on Rashid Shaheed and allow them to be one on one or with Cooper Cup and then you double team where you played the Cloud coverage to J.S.N. And so now you have an I.L.A. Double 11. Yeah, double 11 where you could put hands where you could put hands on him, have someone over the top and I would never play in and out because he's too good of a rope. How does that change so within moving? the way they change is when he moves is now you can just change the sides. The problem is you're never going to be upset to me in my opinion if Carlton Davis ends up on if Carlton Davis has to end up on Rashid Shaheed or Cooper Cup because it's the change of strength motion. What you never want is for JSN to end up in the man the man that would be the guy that's the second that the second while receiver. So for those who don't know the one double recommendation is really interesting because that's what the Patriots use famously Don't wait they did it with the road So and I think it does make sense it does I do think though Gonzales as great as he is she he can get him with a speed you remember Marvin Mims obviously in the AFC JV to have beat him on the go route And I also think if they end up playing any form of man coverage There's here you see here Gonzo. I mean he just got beat like the game. But that's been there. That shot is on the backside. You can take a shot at the safe. I think this is a little bit more into safety than Gonzo. Why you go get Rishi Jihid when you bring in that speed. Because you've seen this all year, you talked about them cutting the crossers. Teams will do that to JSN. It does create opportunities for the other players. I think with the man, if the Patriots were to play a decent amount of man coverage, which they've not been a man coverage team for most of the year, despite having man corners in Davis and Gonzalez, my concern would actually be less JSN, even though I do think he would get his still against Gonzo Revert. And more about the other, yeah, the tight ends, the backs we talk about Walker, if you can get him on the linebackers and the passing game. This is what's scary about this, as much as it flows through JSN, they do have other skill players who can beat you and they can also use them to hunt matchups. That's why the Cooper Cup, bending this game is huge, just the understanding of space and where to go. I wanna get to this new Ian Patre as defense. We talked about them and we haven't really locked in on how good they've been they've been phenomenal as well Now we talk about what the other teams were missing so it kind of dilutes that conversation But there's also within this defensive of Mike Raeble of wanting to keep things in front of him There will be those open holes in this zone There will be these opportunities to find space for Drake May and move this football down the field I'm I'm excuse me. So you look at you look at this defense from New England, right? And you it's about finding where the open spot is. You got this slide on the out route. To me, that's open. To me, that is where your eyes need to get to with an understanding of this is going to be getting the ball out of quick. I mean, of what you're liaing yourself to what you say earlier about this defense. If you can find those opportunities early and downs, sometimes and you notice sometimes the play is called there's an expectation for your first read and if that first read is taken away it's about how fast can you get to those next ones and I think I think New England gives you a lot of those opportunities when they play zone the problem is Milton Williams has been so good at pressing the pocket that you feel some panic before you can find that area that is how they tie in secondary and pass rush. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I'm not going to be a big fan of the game. I think that's the reason why I think that's why I think that's why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the reason why I think that's the the right it's roll behind the guard roll behind the attack like I think that's the challenging thing that person is always give him so many answers first down I think where you see what you're talking about come to play is if it gets to third down that's where you start to worry about. Totally are you guys ready for us to make some Super Bowl picks. Yeah that's coming up next. Sunday at 6 30 Eastern these two teams will figure out which one ends up hoisting that lambardi you see them talking about it it's becoming even more real and Marcus when you look at this game, what's one thing that you think will define Super Bowl 60? A young man by the name of the Wayne Carter, aka Lil Wayne, has said what is a goon to a goblin? Who? And Leonard Williams is a goblin. And the other side of that is Milton Williams is a goblin who we haven't talked about enough who's taken over games for the New England Patriots on the interior. He is doing exactly what they paid him to do for as much as we've talked about the Seattle Seahawks defensive line. These two interior guys between Christian Barmore and Milton Williams have impacted games as much as anybody in these playoffs. And I think in order for New England to have a shot at winning this game and winning it straight up those two are who we are going to have to talk about after the game. I don't know if you can talk about pivotal matchups in this game and not mention the name Will Campbell. Yeah. Since returning from injury, Will Campbell has truly been an Achilles heel for this offense and Drake made the pressure that we've seen created from the left side of the offensive line of the New England Patriots. And if you're Mike McDonald, this sea hawk defense, you're going to try to get as many one-on-one matchups as you can with the young rookie and believe me, he is going to be the fish. going to be arguments on the sideline of who gets the lineup on the right side of the defensive line. We've been showing you guys explosive Seattle pass plays all week. Most of them happen on first down, under sender play action. Jackson's was in jigga running deep on the crosser. They average nearly nine yards of pass on first down this season. Second only to new Patriots. Milton, probably Milton Williams versus the interior of the CX offensive line is a mismatch. but he might not get the opportunity to affect the game if Seattle is efficient and explosive on first down when they can create that run past conflict. It's when they're the best. Yeah, going last on this one is outstanding. There you go, Dan. Dan, do the short call. I turn over. I said kickers. Who can weather the storm? Both of these teams lead the NFL or tops in the NFL when it comes to first quarter scoring differential. So who comes out and throws the first punch? And then who can kind of punch back? Seattle plus 79. first quarter, outscoring their opponents to New England, plus 45. So what team kind of takes that lead early on, but if that's the case, what team in that first quarter can kind of throw that counter punch back? Dan, this is really interesting, but there was a baby that was still in the show back at where's the baby? Nobody. Nobody. I'll take the baby. This is symbol. You know baby. Oh, Dan. Oh, Dan's got the baby. Dan's not going to give the baby back. baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby I'm glad that wasn't me number 12. All right, let's talk about this game. That's what I'm told in my ear. ESPN Analytics simulated the Super Bowl 10,000 times and the Seahawks were victorious in 60% of those simulations. The average score difference in those 10,000 matchups was the Seahawks by about three and a half points. Currently, Draft King Sportsbook has Seattle as a four and a half point favorite. Swagoo, you first, who you got in this game. I remember being in New England. It was snowing and the Houston Texans were coming in the town. And I was thinking to myself, there is no way you're going to be able to beat the Houston Texans with the way that that defense has played. We've talked about Seattle defense and how good they've been all season long. I'm taking Seattle. I'm taking the Seattle fan. I think that from the team, I think all the fans will show up. All right. We have fans this entire week talking about the matchups, both sides of the ball between these teams. Most of those matchups favor the Seattle Seahawks. Top to bottom, the roster is better. If Sam Darnold protects the football, limits turnovers, I do not see how Seattle loses this game. I'm picking Seattle. Yeah, I think every team has their moment and it feels like the Seattle Seahawks are destined for this moment. the way that this defense has played all year. The story behind Sam Drono, then Jackson Smith and Jigabas, absolute excellence at the position. This is a team under Mike McDonald that knew exactly who they needed to be and they'll be that on Sunday evening. Yeah, I'm taking Seattle. They've been one of the more dominant football teams you've seen in the NFL last 10 years. I think New England's defense can keep them in and out so much respect for Mike Brabel as a coach. Seattle has been with the Rams. The two best teams in football all year. Seattle plays their A game. They control the football game. They win. Budgie, please. We are the FFF danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen. I go with the Patriots. Yeah. You guys love me. I knew you. All right. But here's why. I actually think Drake Man is leg. It's going to be the key factor in this game. I think he's going to step up in huge moments. Yeah. I'm doing it for you. No, seriously, Drake May is going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong. And go with that. On Sunday night, I'll go with my seat you as part of the handoff show, which tune in around 1 a.m. Eastern time. We'll talk about how I'm right and you guys are wrong. And then we'll see you again on Monday in Disney Land. We've got all kinds of great coverage still coming up. Thanks to everybody for being here. Thanks for our incredible staff back in Bristol, Connecticut. We love you guys. So we appreciate everybody for tuning in in Joy's Super Bowl 60s. Forks that are with Matt and Hannah's next. Thank you all, I'm in. The The The The The The The The The The The The The The The The The The The The The The The The The The The The The\""
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "result[\"text\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "8cd88e10",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[{'timestamp': (0.0, 4.88),\n",
- " 'text': \" We're now 50 hours till Super Bowl 60 that clock will continue to count down.\"},\n",
- " {'timestamp': (4.88, 6.32),\n",
- " 'text': \" It's going to be here before we know it.\"},\n",
- " {'timestamp': (6.32, 10.64),\n",
- " 'text': \" And let's continue the conversation around JSN, mean, obviously with the ball in his hands,\"},\n",
- " {'timestamp': (10.64, 11.44), 'text': \" he's so dynamic.\"},\n",
- " {'timestamp': (11.44, 14.4),\n",
- " 'text': \" But you've noticed some things when he doesn't even have the ball that you wanted to point out.\"},\n",
- " {'timestamp': (14.4, 17.12),\n",
- " 'text': \" And I think it's a part of why it was such a deserving offensive player of the year\"},\n",
- " {'timestamp': (17.12, 20.04),\n",
- " 'text': ' because of the gravity he exerts on the field.'},\n",
- " {'timestamp': (20.24, 23.92),\n",
- " 'text': ' One of the stats that I love on throws over 10 yards.'},\n",
- " {'timestamp': (24.16, 28.36),\n",
- " 'text': ' When the Seahawks throw to receiver not named JSN,'},\n",
- " {'timestamp': (28.72, 0.0), 'text': ''},\n",
- " {'timestamp': (5.7, 10.16),\n",
- " 'text': \" that receiver is twice as likely as your average NFL receiver to be wide open or also significantly more likely to be open period. So if it's Cooper Cup or\"},\n",
- " {'timestamp': (10.16, 12.76),\n",
- " 'text': \" boat you whenever you want to see I say I'm going to like how is AJ Barnard so\"},\n",
- " {'timestamp': (12.76, 17.1),\n",
- " 'text': ' damn wide open a lot of the time it is because of the attention paid to JSN.'},\n",
- " {'timestamp': (17.1, 21.1),\n",
- " 'text': \" He's also really impactful in the screen game. We've been talking about walkers\"},\n",
- " {'timestamp': (21.1, 24.44),\n",
- " 'text': ' receiver a couple plays from the ends of the season. This is in the Rams game.'},\n",
- " {'timestamp': (24.44, 29.3),\n",
- " 'text': \" So often they run these post over concepts with JSN's as a two-man concept.\"},\n",
- " {'timestamp': (29.3, 0.0), 'text': ''},\n",
- " {'timestamp': (1.0, 2.0), 'text': ' the Rams defensive.'},\n",
- " {'timestamp': (2.0, 3.0), 'text': ' Probably the entire second'},\n",
- " {'timestamp': (3.0, 4.0), 'text': ' area follows J.S.S.'},\n",
- " {'timestamp': (4.0, 5.0), 'text': ' You get the little throwback'},\n",
- " {'timestamp': (5.0, 6.0), 'text': ' screen.'},\n",
- " {'timestamp': (6.0, 7.0), 'text': ' You got a numbers'},\n",
- " {'timestamp': (7.0, 8.0), 'text': ' advantage, which of course'},\n",
- " {'timestamp': (8.0, 9.0), 'text': ' is very helpful.'},\n",
- " {'timestamp': (9.0, 10.0), 'text': ' But in addition to the'},\n",
- " {'timestamp': (10.0, 11.0), 'text': ' numbers and the blockers,'},\n",
- " {'timestamp': (11.0, 12.0), 'text': ' look at all the grass in'},\n",
- " {'timestamp': (12.0, 13.0), 'text': ' front of Kenneth Walker.'},\n",
- " {'timestamp': (13.0, 14.0), 'text': ' You can even see the'},\n",
- " {'timestamp': (14.0, 15.0), 'text': ' defensive when the C.S.'},\n",
- " {'timestamp': (15.0, 16.0), 'text': ' Amd Arnold turns back.'},\n",
- " {'timestamp': (16.0, 17.0), 'text': \" They assume he's going to\"},\n",
- " {'timestamp': (17.0, 18.0), 'text': ' throw to J.S.S.S.S.'},\n",
- " {'timestamp': (18.0, 19.0), 'text': ' because so often Seattle'},\n",
- " {'timestamp': (19.0, 20.0), 'text': ' does.'},\n",
- " {'timestamp': (20.0, 21.0), 'text': ' Again, this is two'},\n",
- " {'timestamp': (21.0, 22.0), 'text': ' couple weeks later against'},\n",
- " {'timestamp': (22.0, 23.0), 'text': ' San Francisco.'},\n",
- " {'timestamp': (23.0, 24.0), 'text': ' J.S.S.S.S.S.'},\n",
- " {'timestamp': (24.0, 25.0), 'text': ' on the in-breaker.'},\n",
- " {'timestamp': (25.0, 26.0), 'text': ' Once again, pulls the'},\n",
- " {'timestamp': (26.0, 27.0), 'text': ' safety.'},\n",
- " {'timestamp': (27.0, 28.0), 'text': ' Throw the screen.'},\n",
- " {'timestamp': (28.0, 29.0), 'text': ' Again, you have the'},\n",
- " {'timestamp': (29.0, 0.0), 'text': ''},\n",
- " {'timestamp': (1.9, 2.9), 'text': ' the player.'},\n",
- " {'timestamp': (2.9, 4.64), 'text': ' And when the'},\n",
- " {'timestamp': (4.64, 6.0), 'text': ' center first down play'},\n",
- " {'timestamp': (6.0, 7.6), 'text': ' action, they run him so much'},\n",
- " {'timestamp': (7.6, 8.8), 'text': ' on those overs and in'},\n",
- " {'timestamp': (8.8, 9.8), 'text': ' breakers.'},\n",
- " {'timestamp': (9.8, 10.8), 'text': ' It creates so much space for'},\n",
- " {'timestamp': (10.8, 11.8), 'text': ' the other players on the'},\n",
- " {'timestamp': (11.8, 12.8), 'text': ' game.'},\n",
- " {'timestamp': (12.8, 13.8), 'text': ' I love that tape.'},\n",
- " {'timestamp': (13.8, 14.8), 'text': ' It reminds me of a lot of'},\n",
- " {'timestamp': (14.8, 15.8), 'text': ' what Andre Johnson used to'},\n",
- " {'timestamp': (15.8, 17.8), 'text': ' impact in Houston.'},\n",
- " {'timestamp': (17.8, 19.8), 'text': ' Gary Kubiak, Kyle Shanahan.'},\n",
- " {'timestamp': (19.8, 20.8), 'text': ' I remember Kyle often'},\n",
- " {'timestamp': (20.8, 22.8), 'text': ' talking about install.'},\n",
- " {'timestamp': (22.8, 23.8), 'text': ' That was the part of the'},\n",
- " {'timestamp': (23.8, 24.8), 'text': ' game where he would move'},\n",
- " {'timestamp': (24.8, 25.8), 'text': ' Andre in all these'},\n",
- " {'timestamp': (25.8, 26.8), 'text': ' different places.'},\n",
- " {'timestamp': (26.8, 27.8), 'text': ' Andre would be in the'},\n",
- " {'timestamp': (27.8, 28.8), 'text': ' slot.'},\n",
- " {'timestamp': (28.8, 0.0), 'text': ''},\n",
- " {'timestamp': (3.16, 5.68),\n",
- " 'text': ' the motion out to end into how across the formation, bring them back, line them up in the backfield.'},\n",
- " {'timestamp': (5.68, 7.48),\n",
- " 'text': \" And I think what you'll see in this game\"},\n",
- " {'timestamp': (7.48, 10.48), 'text': ' is Clint Kubiak doing that early on.'},\n",
- " {'timestamp': (10.48, 13.72),\n",
- " 'text': ' Partly to have success, partly to get information.'},\n",
- " {'timestamp': (13.72, 14.84), 'text': ' All right, what are you doing?'},\n",
- " {'timestamp': (14.84, 16.08), 'text': \" What's New England's plan?\"},\n",
- " {'timestamp': (16.08, 19.44),\n",
- " 'text': ' When I moved J.S. in on first down from the slot outside in,'},\n",
- " {'timestamp': (19.44, 20.28), 'text': \" what's their call?\"},\n",
- " {'timestamp': (20.28, 21.32), 'text': \" What's their check?\"},\n",
- " {'timestamp': (21.32, 22.72), 'text': ' Vice-person, what not.'},\n",
- " {'timestamp': (22.72, 25.92),\n",
- " 'text': ' And when you hear Cooper Cup talk about the route running,'},\n",
- " {'timestamp': (25.92, 28.04), 'text': ' it reminds me of Isaac Bruce.'},\n",
- " {'timestamp': (28.04, 0.0), 'text': ''},\n",
- " {'timestamp': (5.44, 10.16),\n",
- " 'text': \" know with a shoulders never go up or down. They're just kind of steady. There's just no idea from a defensive back of like what's the likeliest route. Yeah, I think what I love\"},\n",
- " {'timestamp': (10.16, 15.56),\n",
- " 'text': ' about what you both are saying is how great players affect the entire football field.'},\n",
- " {'timestamp': (15.56, 20.52),\n",
- " 'text': \" If this was basketball and swag, I'll understand this. If this was basketball, J.S. and Kenneth\"},\n",
- " {'timestamp': (20.52, 24.16),\n",
- " 'text': ' Walker are constantly in the picking role where they are the focal point, right? Like they'},\n",
- " {'timestamp': (24.16, 28.92),\n",
- " 'text': ' can go into the hood and say, I need the ball. But everybody else feeds off of them. And'},\n",
- " {'timestamp': (28.92, 0.0), 'text': ''},\n",
- " {'timestamp': (5.2, 8.96),\n",
- " 'text': \" So whether it's play action or their doubling JSN now Cooper Cup gets opportunity. The fact that he lines up in the backfield he plays number one, he plays the slot.\"},\n",
- " {'timestamp': (8.96, 14.72),\n",
- " 'text': ' And also to me, stacks and condensed splits make it extremely hard to not pay a ton of'},\n",
- " {'timestamp': (14.72, 17.52),\n",
- " 'text': \" tension to them because you're always putting him in traffic.\"},\n",
- " {'timestamp': (17.52, 21.36),\n",
- " 'text': ' When a receiver is in traffic, now you are attracting bodies.'},\n",
- " {'timestamp': (21.36, 25.88),\n",
- " 'text': \" And if you look at some of the plays the Cooper Cups make, the tight ends make when they're\"},\n",
- " {'timestamp': (25.88, 0.0), 'text': ''},\n",
- " {'timestamp': (5.1, 8.26),\n",
- " 'text': \" they are late developing in the play because so much attention has gone to Jackson. Yeah, I think it'd be really interesting just Clint Kubiak.\"},\n",
- " {'timestamp': (8.26, 9.26), 'text': \" What's the answer?\"},\n",
- " {'timestamp': (9.26, 11.26), 'text': \" But he's so good in games.\"},\n",
- " {'timestamp': (11.26, 14.86),\n",
- " 'text': \" You think about the Rams game where JSN really didn't do anything in the first half.\"},\n",
- " {'timestamp': (14.86, 18.76),\n",
- " 'text': ' And then when you see defense as bracketing in Kubiak is so good to your point about moving'},\n",
- " {'timestamp': (18.76, 20.78),\n",
- " 'text': ' him around trying to find ways to get a matchups.'},\n",
- " {'timestamp': (20.78, 22.46), 'text': ' And JSN never gets frustrated.'},\n",
- " {'timestamp': (22.46, 25.7),\n",
- " 'text': ' Yeah, I think the other piece of it is too though.'},\n",
- " {'timestamp': (25.7, 28.3),\n",
- " 'text': \" He understands he's going to get his at some point.\"},\n",
- " {'timestamp': (28.3, 29.98), 'text': \" Yeah, that's the reason you don't.\"},\n",
- " {'timestamp': (29.98, 0.0), 'text': ''},\n",
- " {'timestamp': (4.24, 7.92),\n",
- " 'text': \" have to get frustrated. It's not even that we have to have you number one in the progression. We have to make sure we call this play for you. It's eventually\"},\n",
- " {'timestamp': (7.92, 12.28),\n",
- " 'text': \" through the flow of the offense. You'll have your chance. And when he does, he\"},\n",
- " {'timestamp': (12.28, 17.34),\n",
- " 'text': ' seems to always capitalize. And like Dan says, every route, the stem, the top'},\n",
- " {'timestamp': (17.34, 22.2),\n",
- " 'text': ' of the route, they look the same. That was a moment happens. You separate so much.'},\n",
- " {'timestamp': (22.2, 25.56),\n",
- " 'text': \" He's so talented. Adarnals 18 completions or 20 or more\"},\n",
- " {'timestamp': (25.56, 0.0), 'text': ''},\n",
- " {'timestamp': (2.0, 4.0),\n",
- " 'text': ' area yards to Smith and Jigba are the most this season'},\n",
- " {'timestamp': (4.0, 6.0), 'text': ' by any QB wide receiver combo.'},\n",
- " {'timestamp': (6.0, 8.0), 'text': \" And we're going to talk about the\"},\n",
- " {'timestamp': (8.0, 10.0), 'text': ' Patriots answer to this.'},\n",
- " {'timestamp': (10.0, 12.0), 'text': ' We continue to break down every'},\n",
- " {'timestamp': (12.0, 14.0), 'text': ' angle of the Super Bowl 60.'},\n",
- " {'timestamp': (14.0, 16.0), 'text': \" There's a look at sparkling\"},\n",
- " {'timestamp': (16.0, 18.0), 'text': \" Levi's stadium just about 35\"},\n",
- " {'timestamp': (18.0, 20.0), 'text': \" miles down the road. It's going to\"},\n",
- " {'timestamp': (20.0, 22.0), 'text': ' take you a while to get there though.'},\n",
- " {'timestamp': (22.0, 24.0), 'text': \" Just telling you traffic's been a\"},\n",
- " {'timestamp': (24.0, 26.0), 'text': ' little much here. But we expect'},\n",
- " {'timestamp': (26.0, 28.0), 'text': ' that on a Super Bowl week.'},\n",
- " {'timestamp': (28.0, 0.0), 'text': ''},\n",
- " {'timestamp': (6.56, 11.12),\n",
- " 'text': ' the Patriots can answer JSN on the other side. Opposing quarterbacks are completing just 39% of their passes with him in coverage this postseason on pace to be the lowest in a single'},\n",
- " {'timestamp': (11.12, 17.68),\n",
- " 'text': \" season postseason since NFL next year and began tracking in 2018. He's allowed no touchdowns\"},\n",
- " {'timestamp': (17.68, 21.2),\n",
- " 'text': \" with an interception. He's just been locked down the best way to describe it right and\"},\n",
- " {'timestamp': (21.2, 27.52),\n",
- " 'text': ' Dan, we spoke at length about JSN the last block. What can New England try to do defensively'},\n",
- " {'timestamp': (27.52, 0.0), 'text': ''},\n",
- " {'timestamp': (1.84, 4.84),\n",
- " 'text': ' to answer that on the other side. Yeah, so one pass off to palms.'},\n",
- " {'timestamp': (4.84, 8.16),\n",
- " 'text': \" And this is something we've talked about, JSN and the Crossers\"},\n",
- " {'timestamp': (8.16, 10.88),\n",
- " 'text': ' start on one side of the field in this play action'},\n",
- " {'timestamp': (10.88, 12.32), 'text': ' or crossing concept.'},\n",
- " {'timestamp': (12.32, 14.4), 'text': ' New England is notorious for this.'},\n",
- " {'timestamp': (14.4, 15.96), 'text': \" They didn't do it a ton this year,\"},\n",
- " {'timestamp': (15.96, 17.92),\n",
- " 'text': \" but there's a clip in the Denver game.\"},\n",
- " {'timestamp': (17.92, 19.28), 'text': ' And this is going to be palms, OK?'},\n",
- " {'timestamp': (19.28, 22.6),\n",
- " 'text': \" So that linebacker's responsible for what we call the Curl,\"},\n",
- " {'timestamp': (22.6, 23.2), 'text': ' the Flat Area.'},\n",
- " {'timestamp': (23.2, 24.0), 'text': ' This is quarter.'},\n",
- " {'timestamp': (24.0, 26.04),\n",
- " 'text': \" So everybody's responsible for a quarter of the field\"},\n",
- " {'timestamp': (26.04, 27.0), 'text': ' in defense.'},\n",
- " {'timestamp': (27.0, 28.6),\n",
- " 'text': ' Those two defenders at the bottom of the screen'},\n",
- " {'timestamp': (28.6, 29.32), 'text': ' are playing palms.'},\n",
- " {'timestamp': (29.32, 0.0), 'text': ''},\n",
- " {'timestamp': (1.84, 5.72),\n",
- " 'text': \" They're reading that slot receiver. Once that slot receiver outbreaks, it becomes cover two.\"},\n",
- " {'timestamp': (5.72, 7.0), 'text': ' The corner drives it.'},\n",
- " {'timestamp': (7.0, 8.44), 'text': ' The safety plays over the top.'},\n",
- " {'timestamp': (8.44, 11.08),\n",
- " 'text': \" So that's one way, will they'll try to have extra bodies\"},\n",
- " {'timestamp': (11.08, 11.68), 'text': ' on them.'},\n",
- " {'timestamp': (11.68, 14.56),\n",
- " 'text': ' The other part is this pass off when a crossing route'},\n",
- " {'timestamp': (14.56, 15.08), 'text': ' happens.'},\n",
- " {'timestamp': (15.08, 17.72),\n",
- " 'text': ' The man in coverage passes it off to the safety.'},\n",
- " {'timestamp': (17.72, 19.96), 'text': ' He cuts it and goes over the top.'},\n",
- " {'timestamp': (19.96, 22.6),\n",
- " 'text': \" The palms thing for me is always from the quarterback's\"},\n",
- " {'timestamp': (22.6, 22.8), 'text': ' thought.'},\n",
- " {'timestamp': (22.8, 25.48),\n",
- " 'text': \" I'm always like, it's hard at times for us.\"},\n",
- " {'timestamp': (25.48, 28.52),\n",
- " 'text': \" But I always know there's a shot down the field if you're\"},\n",
- " {'timestamp': (28.52, 29.4), 'text': ' not on the same page.'},\n",
- " {'timestamp': (29.4, 0.0), 'text': ''},\n",
- " {'timestamp': (4.0, 7.0),\n",
- " 'text': \" I'm trying to think about the amount of times I've seen San Bernard and Seattle take whole shots.\"},\n",
- " {'timestamp': (7.0, 11.0),\n",
- " 'text': ' The reason that I would love to see palms or love to see'},\n",
- " {'timestamp': (11.0, 14.0),\n",
- " 'text': \" two-sake these highs because I always think it's a great\"},\n",
- " {'timestamp': (14.0, 17.0),\n",
- " 'text': ' defense for condensed splits deep crossers.'},\n",
- " {'timestamp': (17.0, 19.0),\n",
- " 'text': ' When you look at teams that have had success against two'},\n",
- " {'timestamp': (19.0, 22.0),\n",
- " 'text': \" of Tungalvaloa and Miami dolphins, a lot of times you've\"},\n",
- " {'timestamp': (22.0, 26.0),\n",
- " 'text': \" seen them in Cover 2 or palms or two buster where you're\"},\n",
- " {'timestamp': (26.0, 29.0),\n",
- " 'text': ' passing off things with people who can see it from high to'},\n",
- " {'timestamp': (29.0, 0.0), 'text': ''},\n",
- " {'timestamp': (3.0, 6.0),\n",
- " 'text': \" that's why it's so difficult in cover 3. I also wonder what you do with Christian Gonzalez\"},\n",
- " {'timestamp': (6.0, 9.0),\n",
- " 'text': \" if you're ever say okay we got to get it more men and men\"},\n",
- " {'timestamp': (9.0, 12.0),\n",
- " 'text': ' do you put him on Rashid Shaheed and allow them to be'},\n",
- " {'timestamp': (12.0, 15.0),\n",
- " 'text': ' one on one or with Cooper Cup and then you double team'},\n",
- " {'timestamp': (15.0, 18.0),\n",
- " 'text': ' where you played the Cloud coverage to J.S.N.'},\n",
- " {'timestamp': (18.0, 20.0), 'text': ' And so now you have an I.L.A.'},\n",
- " {'timestamp': (20.0, 21.0), 'text': ' Double 11.'},\n",
- " {'timestamp': (21.0, 23.0),\n",
- " 'text': ' Yeah, double 11 where you could put hands where you could'},\n",
- " {'timestamp': (23.0, 25.0),\n",
- " 'text': ' put hands on him, have someone over the top and I would'},\n",
- " {'timestamp': (25.0, 28.0),\n",
- " 'text': \" never play in and out because he's too good of a rope.\"},\n",
- " {'timestamp': (28.0, 29.0),\n",
- " 'text': ' How does that change so within moving?'},\n",
- " {'timestamp': (29.0, 0.0), 'text': ''},\n",
- " {'timestamp': (7.76, 15.52),\n",
- " 'text': \" the way they change is when he moves is now you can just change the sides. The problem is you're never going to be upset to me in my opinion if Carlton Davis ends up on if Carlton Davis has to end up on Rashid Shaheed or Cooper Cup because it's the change of strength motion.\"},\n",
- " {'timestamp': (15.52, 23.28),\n",
- " 'text': \" What you never want is for JSN to end up in the man the man that would be the guy that's the second that the second while receiver.\"},\n",
- " {'timestamp': (23.28, 0.0), 'text': ''},\n",
- " {'timestamp': (6.14, 7.64),\n",
- " 'text': \" So for those who don't know the one double recommendation is really interesting because that's what the Patriots use famously Don't wait they did it with the road\"},\n",
- " {'timestamp': (7.64, 12.92),\n",
- " 'text': ' So and I think it does make sense it does I do think though'},\n",
- " {'timestamp': (13.68, 18.88),\n",
- " 'text': ' Gonzales as great as he is she he can get him with a speed you remember Marvin Mims obviously in the AFC'},\n",
- " {'timestamp': (18.88, 20.4), 'text': ' JV to have beat him on the go route'},\n",
- " {'timestamp': (20.4, 24.06),\n",
- " 'text': ' And I also think if they end up playing any form of man coverage'},\n",
- " {'timestamp': (24.32, 28.18),\n",
- " 'text': \" There's here you see here Gonzo. I mean he just got beat like\"},\n",
- " {'timestamp': (28.8, 0.0), 'text': ''},\n",
- " {'timestamp': (1.0, 2.0), 'text': \" the game. But that's been there.\"},\n",
- " {'timestamp': (2.0, 3.0), 'text': ' That shot is on the backside.'},\n",
- " {'timestamp': (3.0, 5.0), 'text': ' You can take a shot at the safe.'},\n",
- " {'timestamp': (5.0, 8.0),\n",
- " 'text': ' I think this is a little bit more into safety than Gonzo.'},\n",
- " {'timestamp': (8.0, 12.0),\n",
- " 'text': ' Why you go get Rishi Jihid when you bring in that speed.'},\n",
- " {'timestamp': (12.0, 15.0),\n",
- " 'text': \" Because you've seen this all year, you talked about them cutting the crossers.\"},\n",
- " {'timestamp': (15.0, 16.0), 'text': ' Teams will do that to JSN.'},\n",
- " {'timestamp': (16.0, 18.0),\n",
- " 'text': ' It does create opportunities for the other players.'},\n",
- " {'timestamp': (18.0, 21.0),\n",
- " 'text': ' I think with the man, if the Patriots were to play a decent amount of man coverage,'},\n",
- " {'timestamp': (21.0, 26.0),\n",
- " 'text': \" which they've not been a man coverage team for most of the year, despite having man corners in Davis and Gonzalez,\"},\n",
- " {'timestamp': (26.0, 0.0), 'text': ''},\n",
- " {'timestamp': (2.1, 3.68),\n",
- " 'text': ' my concern would actually be less JSN, even though I do think he would get his still'},\n",
- " {'timestamp': (3.68, 5.0), 'text': ' against Gonzo Revert.'},\n",
- " {'timestamp': (5.0, 7.4),\n",
- " 'text': ' And more about the other, yeah, the tight ends,'},\n",
- " {'timestamp': (7.4, 8.84), 'text': ' the backs we talk about Walker,'},\n",
- " {'timestamp': (8.84, 10.52),\n",
- " 'text': ' if you can get him on the linebackers'},\n",
- " {'timestamp': (10.52, 11.76), 'text': ' and the passing game.'},\n",
- " {'timestamp': (11.76, 13.0), 'text': \" This is what's scary about this,\"},\n",
- " {'timestamp': (13.0, 15.56), 'text': ' as much as it flows through JSN,'},\n",
- " {'timestamp': (15.56, 17.88),\n",
- " 'text': ' they do have other skill players who can beat you'},\n",
- " {'timestamp': (17.88, 19.88),\n",
- " 'text': ' and they can also use them to hunt matchups.'},\n",
- " {'timestamp': (19.88, 21.72), 'text': \" That's why the Cooper Cup,\"},\n",
- " {'timestamp': (21.72, 23.96), 'text': ' bending this game is huge,'},\n",
- " {'timestamp': (23.96, 26.8),\n",
- " 'text': ' just the understanding of space and where to go.'},\n",
- " {'timestamp': (26.8, 29.04),\n",
- " 'text': ' I wanna get to this new Ian Patre as defense.'},\n",
- " {'timestamp': (29.04, 0.0), 'text': ''},\n",
- " {'timestamp': (5.94, 10.26),\n",
- " 'text': \" We talked about them and we haven't really locked in on how good they've been they've been phenomenal as well Now we talk about what the other teams were missing so it kind of dilutes that conversation\"},\n",
- " {'timestamp': (10.26, 15.2),\n",
- " 'text': \" But there's also within this defensive of Mike Raeble of wanting to keep things in front of him\"},\n",
- " {'timestamp': (15.3, 18.94),\n",
- " 'text': ' There will be those open holes in this zone'},\n",
- " {'timestamp': (18.94, 25.74),\n",
- " 'text': ' There will be these opportunities to find space for Drake May and move this football down the field'},\n",
- " {'timestamp': (25.74, 0.0), 'text': ''},\n",
- " {'timestamp': (4.16, 9.48),\n",
- " 'text': \" I'm I'm excuse me. So you look at you look at this defense from New England, right? And you it's about finding where the open spot is. You got this slide on\"},\n",
- " {'timestamp': (9.48, 14.2),\n",
- " 'text': \" the out route. To me, that's open. To me, that is where your eyes need to get to\"},\n",
- " {'timestamp': (14.2, 17.48),\n",
- " 'text': ' with an understanding of this is going to be getting the ball out of'},\n",
- " {'timestamp': (17.48, 20.72),\n",
- " 'text': \" quick. I mean, of what you're liaing yourself to what you say earlier about this\"},\n",
- " {'timestamp': (20.72, 25.72),\n",
- " 'text': ' defense. If you can find those opportunities early and downs, sometimes and'},\n",
- " {'timestamp': (25.72, 0.0), 'text': ''},\n",
- " {'timestamp': (4.94, 9.3),\n",
- " 'text': \" you notice sometimes the play is called there's an expectation for your first read and if that first read is taken away it's about how fast can you get to\"},\n",
- " {'timestamp': (9.3, 13.92),\n",
- " 'text': ' those next ones and I think I think New England gives you a lot of those'},\n",
- " {'timestamp': (13.92, 18.42),\n",
- " 'text': ' opportunities when they play zone the problem is Milton Williams has been so'},\n",
- " {'timestamp': (18.42, 23.22),\n",
- " 'text': ' good at pressing the pocket that you feel some panic before you can find that'},\n",
- " {'timestamp': (23.22, 26.96),\n",
- " 'text': ' area that is how they tie in secondary and pass rush.'},\n",
- " {'timestamp': (26.96, 0.0), 'text': ''},\n",
- " {'timestamp': (2.0, 4.0),\n",
- " 'text': \" I'm not going to be a big fan of the game.\"},\n",
- " {'timestamp': (4.0, 6.0), 'text': \" I'm not going to be a big fan\"},\n",
- " {'timestamp': (6.0, 8.0), 'text': ' of the game.'},\n",
- " {'timestamp': (8.0, 10.0), 'text': \" I'm not going to be a big fan of\"},\n",
- " {'timestamp': (10.0, 12.0), 'text': ' the game.'},\n",
- " {'timestamp': (12.0, 14.0), 'text': \" I'm not going to be a big fan of\"},\n",
- " {'timestamp': (14.0, 16.0), 'text': ' the game.'},\n",
- " {'timestamp': (16.0, 18.0), 'text': \" I'm not going to be a big fan of\"},\n",
- " {'timestamp': (18.0, 20.0), 'text': ' the game.'},\n",
- " {'timestamp': (20.0, 22.0), 'text': \" I'm not going to be a big fan of\"},\n",
- " {'timestamp': (22.0, 24.0), 'text': ' the game.'},\n",
- " {'timestamp': (24.0, 26.0), 'text': \" I'm not going to be a big fan of\"},\n",
- " {'timestamp': (26.0, 28.0), 'text': ' the game.'},\n",
- " {'timestamp': (28.0, 0.0), 'text': ''},\n",
- " {'timestamp': (1.9, 2.9),\n",
- " 'text': \" I think that's the reason why I think that's why I think\"},\n",
- " {'timestamp': (2.9, 3.9), 'text': \" that's why I think that's the\"},\n",
- " {'timestamp': (3.9, 4.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (4.9, 5.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (5.9, 6.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (6.9, 7.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (7.9, 8.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (8.9, 9.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (9.9, 10.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (10.9, 11.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (11.9, 12.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (12.9, 13.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (13.9, 14.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (14.9, 15.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (15.9, 16.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (16.9, 17.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (17.9, 18.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (18.9, 19.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (19.9, 20.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (20.9, 21.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (21.9, 22.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (22.9, 23.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (23.9, 24.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (24.9, 25.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (25.9, 26.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (26.9, 27.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (27.9, 28.9), 'text': \" reason why I think that's the\"},\n",
- " {'timestamp': (28.9, 0.0), 'text': ''},\n",
- " {'timestamp': (2.94, 3.04),\n",
- " 'text': \" the right it's roll behind the guard roll behind the\"},\n",
- " {'timestamp': (5.58, 5.68),\n",
- " 'text': \" attack like I think that's the challenging thing that\"},\n",
- " {'timestamp': (8.78, 8.88),\n",
- " 'text': ' person is always give him so many answers first down I'},\n",
- " {'timestamp': (10.78, 10.88),\n",
- " 'text': \" think where you see what you're talking about come to play\"},\n",
- " {'timestamp': (14.02, 14.12),\n",
- " 'text': \" is if it gets to third down that's where you start to\"},\n",
- " {'timestamp': (14.72, 14.82), 'text': ' worry about.'},\n",
- " {'timestamp': (17.06, 17.16),\n",
- " 'text': ' Totally are you guys ready for us to make some Super Bowl'},\n",
- " {'timestamp': (17.62, 17.72), 'text': ' picks.'},\n",
- " {'timestamp': (19.76, 20.86), 'text': \" Yeah that's coming up next.\"},\n",
- " {'timestamp': (24.36, 24.46),\n",
- " 'text': ' Sunday at 6 30 Eastern these two teams will figure out'},\n",
- " {'timestamp': (26.96, 27.06),\n",
- " 'text': ' which one ends up hoisting that lambardi you see them'},\n",
- " {'timestamp': (29.94, 0.0),\n",
- " 'text': \" talking about it it's becoming even more real and Marcus\"},\n",
- " {'timestamp': (5.36, 7.36),\n",
- " 'text': \" when you look at this game, what's one thing that you think will define Super Bowl 60?\"},\n",
- " {'timestamp': (15.2, 15.7),\n",
- " 'text': ' A young man by the name of the Wayne Carter, aka Lil Wayne, has said what is a goon to a goblin? Who?'},\n",
- " {'timestamp': (15.7, 17.68), 'text': ' And Leonard Williams is a goblin.'},\n",
- " {'timestamp': (17.68, 24.48),\n",
- " 'text': \" And the other side of that is Milton Williams is a goblin who we haven't talked about enough\"},\n",
- " {'timestamp': (24.48, 0.0), 'text': ''},\n",
- " {'timestamp': (3.64, 7.88),\n",
- " 'text': \" who's taken over games for the New England Patriots on the interior. He is doing exactly what they paid him to do for as much as\"},\n",
- " {'timestamp': (7.88, 11.84),\n",
- " 'text': \" we've talked about the Seattle Seahawks defensive line. These two\"},\n",
- " {'timestamp': (11.84, 15.92),\n",
- " 'text': ' interior guys between Christian Barmore and Milton Williams have'},\n",
- " {'timestamp': (15.92, 20.44),\n",
- " 'text': ' impacted games as much as anybody in these playoffs. And I think in'},\n",
- " {'timestamp': (20.44, 24.0),\n",
- " 'text': ' order for New England to have a shot at winning this game and winning'},\n",
- " {'timestamp': (24.0, 27.44),\n",
- " 'text': ' it straight up those two are who we are going to have to talk about'},\n",
- " {'timestamp': (27.44, 28.12), 'text': ' after the game.'},\n",
- " {'timestamp': (28.12, 0.0), 'text': ''},\n",
- " {'timestamp': (3.74, 4.74),\n",
- " 'text': \" I don't know if you can talk about pivotal matchups in this game and not mention the name Will Campbell.\"},\n",
- " {'timestamp': (4.74, 5.74), 'text': ' Yeah.'},\n",
- " {'timestamp': (5.74, 10.56),\n",
- " 'text': ' Since returning from injury, Will Campbell has truly been an Achilles heel for this offense'},\n",
- " {'timestamp': (10.56, 15.5),\n",
- " 'text': \" and Drake made the pressure that we've seen created from the left side of the offensive\"},\n",
- " {'timestamp': (15.5, 16.98), 'text': ' line of the New England Patriots.'},\n",
- " {'timestamp': (16.98, 21.44),\n",
- " 'text': \" And if you're Mike McDonald, this sea hawk defense, you're going to try to get as many\"},\n",
- " {'timestamp': (21.44, 26.44),\n",
- " 'text': ' one-on-one matchups as you can with the young rookie and believe me, he is going to be'},\n",
- " {'timestamp': (26.44, 27.44), 'text': ' the fish.'},\n",
- " {'timestamp': (27.44, 0.0), 'text': ''},\n",
- " {'timestamp': (5.44, 9.24),\n",
- " 'text': \" going to be arguments on the sideline of who gets the lineup on the right side of the defensive line. We've been showing you guys explosive Seattle pass plays all week.\"},\n",
- " {'timestamp': (9.24, 13.44),\n",
- " 'text': ' Most of them happen on first down, under sender play action.'},\n",
- " {'timestamp': (13.44, 15.76),\n",
- " 'text': \" Jackson's was in jigga running deep on the crosser.\"},\n",
- " {'timestamp': (15.76, 19.68),\n",
- " 'text': ' They average nearly nine yards of pass on first down this season.'},\n",
- " {'timestamp': (19.68, 21.44), 'text': ' Second only to new Patriots.'},\n",
- " {'timestamp': (21.44, 26.84),\n",
- " 'text': ' Milton, probably Milton Williams versus the interior of the CX offensive line is a mismatch.'},\n",
- " {'timestamp': (26.84, 0.0), 'text': ''},\n",
- " {'timestamp': (3.2, 6.04),\n",
- " 'text': ' but he might not get the opportunity to affect the game if Seattle is efficient and explosive on first down'},\n",
- " {'timestamp': (6.04, 7.64),\n",
- " 'text': ' when they can create that run past conflict.'},\n",
- " {'timestamp': (7.64, 8.64), 'text': \" It's when they're the best.\"},\n",
- " {'timestamp': (8.64, 10.4),\n",
- " 'text': ' Yeah, going last on this one is outstanding.'},\n",
- " {'timestamp': (10.4, 12.0), 'text': ' There you go, Dan.'},\n",
- " {'timestamp': (12.0, 13.0), 'text': ' Dan, do the short call.'},\n",
- " {'timestamp': (13.0, 14.0), 'text': ' I turn over.'},\n",
- " {'timestamp': (14.0, 15.28), 'text': ' I said kickers.'},\n",
- " {'timestamp': (15.28, 16.48), 'text': ' Who can weather the storm?'},\n",
- " {'timestamp': (16.48, 19.68),\n",
- " 'text': ' Both of these teams lead the NFL or tops in the NFL'},\n",
- " {'timestamp': (19.68, 22.76),\n",
- " 'text': ' when it comes to first quarter scoring differential.'},\n",
- " {'timestamp': (22.76, 25.48),\n",
- " 'text': ' So who comes out and throws the first punch?'},\n",
- " {'timestamp': (25.48, 27.08),\n",
- " 'text': ' And then who can kind of punch back?'},\n",
- " {'timestamp': (27.08, 29.76), 'text': ' Seattle plus 79.'},\n",
- " {'timestamp': (29.76, 0.0), 'text': ''},\n",
- " {'timestamp': (3.64, 8.64),\n",
- " 'text': \" first quarter, outscoring their opponents to New England, plus 45. So what team kind of takes that lead early on, but if that's the case, what team in that\"},\n",
- " {'timestamp': (8.64, 10.92),\n",
- " 'text': ' first quarter can kind of throw that counter punch back?'},\n",
- " {'timestamp': (10.92, 14.14),\n",
- " 'text': ' Dan, this is really interesting, but there was a baby that was still in the show back'},\n",
- " {'timestamp': (14.14, 15.14), 'text': \" at where's the baby?\"},\n",
- " {'timestamp': (15.14, 16.14), 'text': ' Nobody.'},\n",
- " {'timestamp': (16.14, 17.14), 'text': ' Nobody.'},\n",
- " {'timestamp': (17.14, 19.14), 'text': \" I'll take the baby.\"},\n",
- " {'timestamp': (19.14, 20.14), 'text': ' This is symbol.'},\n",
- " {'timestamp': (20.14, 21.14), 'text': ' You know baby.'},\n",
- " {'timestamp': (21.14, 22.14), 'text': ' Oh, Dan.'},\n",
- " {'timestamp': (22.14, 25.64), 'text': \" Oh, Dan's got the baby.\"},\n",
- " {'timestamp': (25.64, 29.56),\n",
- " 'text': \" Dan's not going to give the baby back.\"},\n",
- " {'timestamp': (29.56, 0.0), 'text': ''},\n",
- " {'timestamp': (0.0, 4.0),\n",
- " 'text': \" baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby baby I'm glad that wasn't me number 12.\"},\n",
- " {'timestamp': (4.0, 6.0), 'text': \" All right, let's talk about this game.\"},\n",
- " {'timestamp': (6.0, 8.0), 'text': \" That's what I'm told in my ear.\"},\n",
- " {'timestamp': (8.0, 12.0),\n",
- " 'text': ' ESPN Analytics simulated the Super Bowl 10,000 times'},\n",
- " {'timestamp': (12.0, 16.0),\n",
- " 'text': ' and the Seahawks were victorious in 60% of those simulations.'},\n",
- " {'timestamp': (16.0, 20.0),\n",
- " 'text': ' The average score difference in those 10,000 matchups was the Seahawks'},\n",
- " {'timestamp': (20.0, 22.0), 'text': ' by about three and a half points.'},\n",
- " {'timestamp': (22.0, 26.0),\n",
- " 'text': ' Currently, Draft King Sportsbook has Seattle as a four and a half point favorite.'},\n",
- " {'timestamp': (26.0, 28.0),\n",
- " 'text': ' Swagoo, you first, who you got in this game.'},\n",
- " {'timestamp': (28.0, 0.0), 'text': ''},\n",
- " {'timestamp': (4.0, 7.88),\n",
- " 'text': ' I remember being in New England. It was snowing and the Houston Texans were coming in the town.'},\n",
- " {'timestamp': (7.88, 13.0),\n",
- " 'text': \" And I was thinking to myself, there is no way you're going to be able to beat the Houston\"},\n",
- " {'timestamp': (13.0, 16.0),\n",
- " 'text': ' Texans with the way that that defense has played.'},\n",
- " {'timestamp': (16.0, 20.92),\n",
- " 'text': \" We've talked about Seattle defense and how good they've been all season long.\"},\n",
- " {'timestamp': (20.92, 21.92), 'text': \" I'm taking Seattle.\"},\n",
- " {'timestamp': (21.92, 22.92), 'text': \" I'm taking the Seattle fan.\"},\n",
- " {'timestamp': (22.92, 27.52),\n",
- " 'text': ' I think that from the team, I think all the fans will show up.'},\n",
- " {'timestamp': (27.52, 0.0), 'text': ''},\n",
- " {'timestamp': (1.0, 5.2),\n",
- " 'text': ' All right. We have fans this entire week talking about the matchups, both sides of the ball between'},\n",
- " {'timestamp': (5.2, 6.7), 'text': ' these teams.'},\n",
- " {'timestamp': (6.7, 9.56),\n",
- " 'text': ' Most of those matchups favor the Seattle Seahawks.'},\n",
- " {'timestamp': (9.56, 11.88), 'text': ' Top to bottom, the roster is better.'},\n",
- " {'timestamp': (11.88, 16.88),\n",
- " 'text': ' If Sam Darnold protects the football, limits turnovers, I do not see how Seattle loses this'},\n",
- " {'timestamp': (16.88, 17.88), 'text': ' game.'},\n",
- " {'timestamp': (17.88, 18.88), 'text': \" I'm picking Seattle.\"},\n",
- " {'timestamp': (18.88, 23.8),\n",
- " 'text': ' Yeah, I think every team has their moment and it feels like the Seattle Seahawks are'},\n",
- " {'timestamp': (23.8, 25.76), 'text': ' destined for this moment.'},\n",
- " {'timestamp': (25.76, 0.0), 'text': ''},\n",
- " {'timestamp': (2.36, 7.92),\n",
- " 'text': ' the way that this defense has played all year. The story behind Sam Drono, then Jackson Smith and Jigabas, absolute excellence at the'},\n",
- " {'timestamp': (7.92, 8.92), 'text': ' position.'},\n",
- " {'timestamp': (8.92, 13.48),\n",
- " 'text': \" This is a team under Mike McDonald that knew exactly who they needed to be and they'll\"},\n",
- " {'timestamp': (13.48, 14.48), 'text': ' be that on Sunday evening.'},\n",
- " {'timestamp': (14.48, 15.48), 'text': \" Yeah, I'm taking Seattle.\"},\n",
- " {'timestamp': (15.48, 20.28),\n",
- " 'text': \" They've been one of the more dominant football teams you've seen in the NFL last 10 years.\"},\n",
- " {'timestamp': (20.28, 24.8),\n",
- " 'text': \" I think New England's defense can keep them in and out so much respect for Mike Brabel\"},\n",
- " {'timestamp': (24.8, 26.24), 'text': ' as a coach.'},\n",
- " {'timestamp': (26.24, 0.0), 'text': ''},\n",
- " {'timestamp': (2.54, 4.74),\n",
- " 'text': ' Seattle has been with the Rams. The two best teams in football all year.'},\n",
- " {'timestamp': (4.74, 6.14), 'text': ' Seattle plays their A game.'},\n",
- " {'timestamp': (6.14, 7.28), 'text': ' They control the football game.'},\n",
- " {'timestamp': (7.28, 7.78), 'text': ' They win.'},\n",
- " {'timestamp': (7.78, 8.48), 'text': ' Budgie, please.'},\n",
- " {'timestamp': (8.48, 13.84),\n",
- " 'text': ' We are the FFF danger of the NFL live curse affecting Super'},\n",
- " {'timestamp': (13.84, 15.88),\n",
- " 'text': \" Bowl 60, but it's not going to happen.\"},\n",
- " {'timestamp': (15.88, 16.96), 'text': ' I go with the Patriots.'},\n",
- " {'timestamp': (16.96, 17.96), 'text': ' Yeah.'},\n",
- " {'timestamp': (17.96, 18.96), 'text': ' You guys love me.'},\n",
- " {'timestamp': (18.96, 19.96), 'text': ' I knew you.'},\n",
- " {'timestamp': (19.96, 20.96), 'text': ' All right.'},\n",
- " {'timestamp': (20.96, 21.96), 'text': \" But here's why.\"},\n",
- " {'timestamp': (21.96, 23.68), 'text': ' I actually think Drake Man is leg.'},\n",
- " {'timestamp': (23.68, 25.44),\n",
- " 'text': \" It's going to be the key factor in this game.\"},\n",
- " {'timestamp': (25.44, 28.04),\n",
- " 'text': \" I think he's going to step up in huge moments.\"},\n",
- " {'timestamp': (28.04, 29.04), 'text': ' Yeah.'},\n",
- " {'timestamp': (29.04, 0.0), 'text': ''},\n",
- " {'timestamp': (2.0, 6.32),\n",
- " 'text': \" I'm doing it for you. No, seriously, Drake May is going to stand up and it's going to be great and they're\"},\n",
- " {'timestamp': (6.32, 8.32),\n",
- " 'text': ' going to win and you guys are all going to be wrong.'},\n",
- " {'timestamp': (8.32, 9.32), 'text': ' And go with that.'},\n",
- " {'timestamp': (9.32, 12.68),\n",
- " 'text': \" On Sunday night, I'll go with my seat you as part of the handoff show, which tune in\"},\n",
- " {'timestamp': (12.68, 14.96), 'text': ' around 1 a.m. Eastern time.'},\n",
- " {'timestamp': (14.96, 16.96),\n",
- " 'text': \" We'll talk about how I'm right and you guys are wrong.\"},\n",
- " {'timestamp': (16.96, 21.0),\n",
- " 'text': \" And then we'll see you again on Monday in Disney Land.\"},\n",
- " {'timestamp': (21.0, 23.64),\n",
- " 'text': \" We've got all kinds of great coverage still coming up.\"},\n",
- " {'timestamp': (23.64, 25.24), 'text': ' Thanks to everybody for being here.'},\n",
- " {'timestamp': (25.24, 28.16),\n",
- " 'text': ' Thanks for our incredible staff back in Bristol, Connecticut.'},\n",
- " {'timestamp': (28.16, 29.16), 'text': ' We love you guys.'},\n",
- " {'timestamp': (29.16, 0.0), 'text': ''},\n",
- " {'timestamp': (3.44, 5.44),\n",
- " 'text': \" So we appreciate everybody for tuning in in Joy's Super Bowl 60s. Forks that are with Matt and Hannah's next.\"},\n",
- " {'timestamp': (5.44, 6.44), 'text': \" Thank you all, I'm in.\"},\n",
- " {'timestamp': (6.44, 0.0), 'text': ''},\n",
- " {'timestamp': (1.0, 2.0), 'text': ' The The'},\n",
- " {'timestamp': (2.0, 3.0), 'text': ' The'},\n",
- " {'timestamp': (3.0, 4.0), 'text': ' The'},\n",
- " {'timestamp': (4.0, 5.0), 'text': ' The'},\n",
- " {'timestamp': (5.0, 6.0), 'text': ' The'},\n",
- " {'timestamp': (6.0, 7.0), 'text': ' The'},\n",
- " {'timestamp': (7.0, 8.0), 'text': ' The'},\n",
- " {'timestamp': (8.0, 9.0), 'text': ' The'},\n",
- " {'timestamp': (9.0, 10.0), 'text': ' The'},\n",
- " {'timestamp': (10.0, 11.0), 'text': ' The'},\n",
- " {'timestamp': (11.0, 12.0), 'text': ' The'},\n",
- " {'timestamp': (12.0, 13.0), 'text': ' The'},\n",
- " {'timestamp': (13.0, 14.0), 'text': ' The'},\n",
- " {'timestamp': (14.0, 15.0), 'text': ' The'},\n",
- " {'timestamp': (15.0, 16.0), 'text': ' The'},\n",
- " {'timestamp': (16.0, 17.0), 'text': ' The'},\n",
- " {'timestamp': (17.0, 18.0), 'text': ' The'},\n",
- " {'timestamp': (18.0, 19.0), 'text': ' The'},\n",
- " {'timestamp': (19.0, 20.0), 'text': ' The'},\n",
- " {'timestamp': (20.0, 21.0), 'text': ' The'},\n",
- " {'timestamp': (21.0, 22.0), 'text': ' The'},\n",
- " {'timestamp': (22.0, 23.0), 'text': ' The'},\n",
- " {'timestamp': (23.0, 24.0), 'text': ' The'},\n",
- " {'timestamp': (24.0, 25.0), 'text': ' The'},\n",
- " {'timestamp': (25.0, 26.0), 'text': ' The'},\n",
- " {'timestamp': (26.0, 27.0), 'text': ' The'},\n",
- " {'timestamp': (27.0, 28.0), 'text': ' The'},\n",
- " {'timestamp': (28.0, 29.0), 'text': ' The'}]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "result['chunks']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "5448a72e",
- "metadata": {},
- "outputs": [],
- "source": [
- "def whisper_result_to_dataframe(result: Mapping[str, Any], video_id: str) -> pd.DataFrame:\n",
- " \"\"\"\n",
- " Converts the dict from pipeline Whisper into a DataFrame:\n",
- " Text, Start Time, Duration, Video ID.\n",
- " \"\"\"\n",
- " rows = []\n",
- " chunks = result.get(\"chunks\") or []\n",
- " for chunk in chunks:\n",
- " text = (chunk.get(\"text\") or \"\").strip()\n",
- " ts = chunk.get(\"timestamp\")\n",
- " if not text or ts is None:\n",
- " continue\n",
- " \n",
- " if isinstance(ts, (list, tuple)) and len(ts) >= 2:\n",
- " start_s = float(ts[0])\n",
- " end_s = float(ts[1])\n",
- " \n",
- " elif isinstance(ts, dict):\n",
- " start_s = float(ts[\"start\"])\n",
- " end_s = float(ts[\"end\"])\n",
- " else:\n",
- " continue\n",
- " rows.append({\n",
- " \"Text\": text,\n",
- " \"Start Time\": start_s,\n",
- " \"Duration\": end_s - start_s,\n",
- " \"Video ID\": video_id,\n",
- " })\n",
- " return pd.DataFrame(rows)"
+ "df = extract_transcript_df(video_id, extract_with_whisper=True, whisper_pipe=pipe, audio_dir=r\"..\\data\\yt\\mp3_audio\\sports\")"
]
},
{
"cell_type": "code",
- "execution_count": 11,
- "id": "e62d0aeb",
+ "execution_count": null,
+ "id": "f6ce3b55",
"metadata": {},
"outputs": [
{
@@ -2335,150 +541,92 @@
" \n",
" \n",
" | 0 | \n",
- " We're now 50 hours till Super Bowl 60 that clock will continue to count down. | \n",
+ " Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365. | \n",
" 0.00 | \n",
- " 4.88 | \n",
- " ZZN7BAYeOtc | \n",
+ " 14.38 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 1 | \n",
- " It's going to be here before we know it. | \n",
- " 4.88 | \n",
- " 1.44 | \n",
- " ZZN7BAYeOtc | \n",
+ " My name is Jason Logan. I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds. | \n",
+ " 14.56 | \n",
+ " 6.28 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 2 | \n",
- " And let's continue the conversation around JSN, mean, obviously with the ball in his hands, | \n",
- " 6.32 | \n",
- " 4.32 | \n",
- " ZZN7BAYeOtc | \n",
+ " And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman. | \n",
+ " 21.00 | \n",
+ " 5.28 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 3 | \n",
- " he's so dynamic. | \n",
- " 10.64 | \n",
- " 0.80 | \n",
- " ZZN7BAYeOtc | \n",
+ " Todd, happy bye week to you. The Seahawks, the Patriots get a little bit of a breather, | \n",
+ " 4.80 | \n",
+ " 0.08 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 4 | \n",
- " But you've noticed some things when he doesn't even have the ball that you wanted to point out. | \n",
- " 11.44 | \n",
- " 2.96 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 402 | \n",
- " The | \n",
- " 24.00 | \n",
- " 1.00 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 403 | \n",
- " The | \n",
- " 25.00 | \n",
- " 1.00 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 404 | \n",
- " The | \n",
- " 26.00 | \n",
- " 1.00 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 405 | \n",
- " The | \n",
- " 27.00 | \n",
- " 1.00 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 406 | \n",
- " The | \n",
- " 28.00 | \n",
- " 1.00 | \n",
- " ZZN7BAYeOtc | \n",
+ " a little bit of downtime this week, but I always found the bye was like my busiest time, | \n",
+ " 9.38 | \n",
+ " 0.12 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
"\n",
- "407 rows × 4 columns
\n",
""
],
"text/plain": [
- " Text \\\n",
- "0 We're now 50 hours till Super Bowl 60 that clock will continue to count down. \n",
- "1 It's going to be here before we know it. \n",
- "2 And let's continue the conversation around JSN, mean, obviously with the ball in his hands, \n",
- "3 he's so dynamic. \n",
- "4 But you've noticed some things when he doesn't even have the ball that you wanted to point out. \n",
- ".. ... \n",
- "402 The \n",
- "403 The \n",
- "404 The \n",
- "405 The \n",
- "406 The \n",
+ " Text \\\n",
+ "0 Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365. \n",
+ "1 My name is Jason Logan. I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds. \n",
+ "2 And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman. \n",
+ "3 Todd, happy bye week to you. The Seahawks, the Patriots get a little bit of a breather, \n",
+ "4 a little bit of downtime this week, but I always found the bye was like my busiest time, \n",
"\n",
- " Start Time Duration Video ID \n",
- "0 0.00 4.88 ZZN7BAYeOtc \n",
- "1 4.88 1.44 ZZN7BAYeOtc \n",
- "2 6.32 4.32 ZZN7BAYeOtc \n",
- "3 10.64 0.80 ZZN7BAYeOtc \n",
- "4 11.44 2.96 ZZN7BAYeOtc \n",
- ".. ... ... ... \n",
- "402 24.00 1.00 ZZN7BAYeOtc \n",
- "403 25.00 1.00 ZZN7BAYeOtc \n",
- "404 26.00 1.00 ZZN7BAYeOtc \n",
- "405 27.00 1.00 ZZN7BAYeOtc \n",
- "406 28.00 1.00 ZZN7BAYeOtc \n",
- "\n",
- "[407 rows x 4 columns]"
+ " Start Time Duration Video ID \n",
+ "0 0.00 14.38 FPl-F2k_KtM \n",
+ "1 14.56 6.28 FPl-F2k_KtM \n",
+ "2 21.00 5.28 FPl-F2k_KtM \n",
+ "3 4.80 0.08 FPl-F2k_KtM \n",
+ "4 9.38 0.12 FPl-F2k_KtM "
]
},
- "execution_count": 11,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df_whisper_transcripts = whisper_result_to_dataframe(result, video_id)\n",
- "df_whisper_transcripts"
+ "df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "id": "295b3dc1",
+ "execution_count": null,
+ "id": "767d3f95",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Saving CSV file to: c:\\Users\\adria\\OneDrive\\Área de Trabalho\\UF Data Studio\\predictions\\prediction_acquition-youtube\\../data\\yt\\whisper_transcripts\\ZZN7BAYeOtc.csv\n"
+ "Saving CSV file to: c:\\Users\\adria\\OneDrive\\Área de Trabalho\\UF Data Studio\\predictions\\prediction_acquition-youtube\\../data\\yt\\whisper_transcripts\\sports\\FPl-F2k_KtM.csv\n"
]
}
],
"source": [
"base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
- "save_data_path = os.path.join(base_data_path, \"yt\", \"whisper_transcripts\")\n",
- "DataProcessing.save_to_file(df_whisper_transcripts, path=save_data_path, prefix=f'{video_id}', save_file_type='csv', include_version=False)"
+ "save_data_path = os.path.join(base_data_path, \"yt\", \"whisper_transcripts\", \"sports\")\n",
+ "DataProcessing.save_to_file(df, path=save_data_path, prefix=f'{video_id}', save_file_type='csv', include_version=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "47269c24",
+ "id": "646c5307",
"metadata": {},
"outputs": [],
"source": []
@@ -2486,7 +634,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": ".venv",
+ "display_name": "predictions (3.11.14)",
"language": "python",
"name": "python3"
},
diff --git a/prediction_acquition-youtube/2-label_text.ipynb b/prediction_acquition-youtube/2-label_text.ipynb
index b1843b1..d66ed18 100644
--- a/prediction_acquition-youtube/2-label_text.ipynb
+++ b/prediction_acquition-youtube/2-label_text.ipynb
@@ -27,7 +27,11 @@
"import os\n",
"import sys\n",
"\n",
+ "import re\n",
+ "from typing import List\n",
+ "\n",
"import pandas as pd\n",
+ "import numpy as np\n",
"\n",
"from tqdm import tqdm\n",
"from youtube_transcript_api import YouTubeTranscriptApi\n",
@@ -70,184 +74,55 @@
"outputs": [],
"source": [
"base_data_path = DataProcessing.load_base_data_path(notebook_dir)\n",
- "yt_data_path = os.path.join(base_data_path, 'yt', 'raw_transcripts')\n",
+ "yt_data_path = os.path.join(base_data_path, 'yt', 'whisper_transcripts', 'sports')\n",
"transcripts = os.listdir(yt_data_path)"
]
},
{
"cell_type": "code",
- "execution_count": 16,
- "id": "7cf469ac",
+ "execution_count": 4,
+ "id": "7c2f61d6",
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 7/7 [00:00<00:00, 191.73it/s]\n"
- ]
- },
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- " Video ID | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " we all know March Madness is all about | \n",
- " 0.199 | \n",
- " 3.081 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " filling out and following your bracket | \n",
- " 1.760 | \n",
- " 3.640 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " so we offer some first round advice | \n",
- " 3.280 | \n",
- " 4.720 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " since you NBC beat Virginia in 2018 the | \n",
- " 5.400 | \n",
- " 5.040 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 16 seed has won two of 24 meetings | \n",
- " 8.000 | \n",
- " 4.960 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 2110 | \n",
- " and we appreciate | \n",
- " 1059.919 | \n",
- " 1.281 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2111 | \n",
- " [cheering and applause] everybody for | \n",
- " 1060.457 | \n",
- " 3.543 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2112 | \n",
- " tuning in. Enjoy Super Bowl 60 Sports | \n",
- " 1061.200 | \n",
- " 4.320 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2113 | \n",
- " Center with Matt and Hannah's next. | \n",
- " 1064.000 | \n",
- " 4.520 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2114 | \n",
- " >> Amen. | \n",
- " 1065.520 | \n",
- " 3.000 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- "
\n",
- "
2115 rows × 4 columns
\n",
- "
"
- ],
"text/plain": [
- " Text Start Time Duration \\\n",
- "0 we all know March Madness is all about 0.199 3.081 \n",
- "1 filling out and following your bracket 1.760 3.640 \n",
- "2 so we offer some first round advice 3.280 4.720 \n",
- "3 since you NBC beat Virginia in 2018 the 5.400 5.040 \n",
- "4 16 seed has won two of 24 meetings 8.000 4.960 \n",
- "... ... ... ... \n",
- "2110 and we appreciate 1059.919 1.281 \n",
- "2111 [cheering and applause] everybody for 1060.457 3.543 \n",
- "2112 tuning in. Enjoy Super Bowl 60 Sports 1061.200 4.320 \n",
- "2113 Center with Matt and Hannah's next. 1064.000 4.520 \n",
- "2114 >> Amen. 1065.520 3.000 \n",
- "\n",
- " Video ID \n",
- "0 -rjnvL9LL3U \n",
- "1 -rjnvL9LL3U \n",
- "2 -rjnvL9LL3U \n",
- "3 -rjnvL9LL3U \n",
- "4 -rjnvL9LL3U \n",
- "... ... \n",
- "2110 ZZN7BAYeOtc \n",
- "2111 ZZN7BAYeOtc \n",
- "2112 ZZN7BAYeOtc \n",
- "2113 ZZN7BAYeOtc \n",
- "2114 ZZN7BAYeOtc \n",
- "\n",
- "[2115 rows x 4 columns]"
+ "['-rjnvL9LL3U.csv',\n",
+ " '6STv2GFNB6I.csv',\n",
+ " 'AoE8KFAXHSc.csv',\n",
+ " 'FPl-F2k_KtM.csv',\n",
+ " 'fUmJAtFEGn8.csv',\n",
+ " 'jCe-bY1nP7o.csv',\n",
+ " 'LXPQrZV4Cfw.csv',\n",
+ " 'mBK8o5orBbE.csv',\n",
+ " 'MTVAkVkkaz4.csv',\n",
+ " 'Z0xP3GNpjkw.csv',\n",
+ " 'ZZN7BAYeOtc.csv']"
]
},
- "execution_count": 16,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "dfs = []\n",
- "\n",
- "for transcript in tqdm(transcripts):\n",
- " transcript_path = os.path.join(yt_data_path, transcript)\n",
- " df = DataProcessing.load_from_file(path=transcript_path, file_type='csv')\n",
- " dfs.append(df)\n",
+ "transcripts"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc6adbc6",
+ "metadata": {},
+ "source": [
+ "Choose the transcript video that you want to pass through the pipeline. \n",
"\n",
- "raw_transcripts_df = DataProcessing.concat_dfs(dfs)\n",
- "raw_transcripts_df"
+ "Adjust the element from \"transcripts\" list to choose the right \"Video ID\"."
]
},
{
"cell_type": "code",
- "execution_count": 42,
- "id": "00ce4070",
+ "execution_count": 5,
+ "id": "f24214ce",
"metadata": {},
"outputs": [
{
@@ -280,488 +155,366 @@
" \n",
" \n",
" | 0 | \n",
- " we all know March Madness is all about | \n",
- " 0.199 | \n",
- " 3.081 | \n",
- " -rjnvL9LL3U | \n",
+ " Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365. | \n",
+ " 0.00 | \n",
+ " 14.38 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 1 | \n",
- " filling out and following your bracket | \n",
- " 1.760 | \n",
- " 3.640 | \n",
- " -rjnvL9LL3U | \n",
+ " My name is Jason Logan. I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds. | \n",
+ " 14.56 | \n",
+ " 6.28 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 2 | \n",
- " so we offer some first round advice | \n",
- " 3.280 | \n",
- " 4.720 | \n",
- " -rjnvL9LL3U | \n",
+ " And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman. | \n",
+ " 21.00 | \n",
+ " 5.28 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 3 | \n",
- " since you NBC beat Virginia in 2018 the | \n",
- " 5.400 | \n",
- " 5.040 | \n",
- " -rjnvL9LL3U | \n",
+ " Todd, happy bye week to you. The Seahawks, the Patriots get a little bit of a breather, | \n",
+ " 4.80 | \n",
+ " 0.08 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 4 | \n",
- " 16 seed has won two of 24 meetings | \n",
- " 8.000 | \n",
- " 4.960 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " versus the one occasional upset aside | \n",
- " 10.440 | \n",
- " 4.560 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " though the analytics say Advance all the | \n",
- " 12.960 | \n",
- " 4.159 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " one two and three seeds in your bracket | \n",
- " 15.000 | \n",
- " 4.039 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " looking for a long shot the 11 seed has | \n",
- " 17.119 | \n",
- " 3.881 | \n",
- " -rjnvL9LL3U | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " won half its meetings with the six seed | \n",
- " 19.039 | \n",
- " 4.801 | \n",
- " -rjnvL9LL3U | \n",
+ " a little bit of downtime this week, but I always found the bye was like my busiest time, | \n",
+ " 9.38 | \n",
+ " 0.12 | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " Text Start Time Duration Video ID\n",
- "0 we all know March Madness is all about 0.199 3.081 -rjnvL9LL3U\n",
- "1 filling out and following your bracket 1.760 3.640 -rjnvL9LL3U\n",
- "2 so we offer some first round advice 3.280 4.720 -rjnvL9LL3U\n",
- "3 since you NBC beat Virginia in 2018 the 5.400 5.040 -rjnvL9LL3U\n",
- "4 16 seed has won two of 24 meetings 8.000 4.960 -rjnvL9LL3U\n",
- "5 versus the one occasional upset aside 10.440 4.560 -rjnvL9LL3U\n",
- "6 though the analytics say Advance all the 12.960 4.159 -rjnvL9LL3U\n",
- "7 one two and three seeds in your bracket 15.000 4.039 -rjnvL9LL3U\n",
- "8 looking for a long shot the 11 seed has 17.119 3.881 -rjnvL9LL3U\n",
- "9 won half its meetings with the six seed 19.039 4.801 -rjnvL9LL3U"
+ " Text \\\n",
+ "0 Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365. \n",
+ "1 My name is Jason Logan. I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds. \n",
+ "2 And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman. \n",
+ "3 Todd, happy bye week to you. The Seahawks, the Patriots get a little bit of a breather, \n",
+ "4 a little bit of downtime this week, but I always found the bye was like my busiest time, \n",
+ "\n",
+ " Start Time Duration Video ID \n",
+ "0 0.00 14.38 FPl-F2k_KtM \n",
+ "1 14.56 6.28 FPl-F2k_KtM \n",
+ "2 21.00 5.28 FPl-F2k_KtM \n",
+ "3 4.80 0.08 FPl-F2k_KtM \n",
+ "4 9.38 0.12 FPl-F2k_KtM "
]
},
- "execution_count": 42,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "raw_transcripts_df.head(10)"
+ "transcript_path = os.path.join(yt_data_path, transcripts[3])\n",
+ "\n",
+ "raw_transcripts_df = DataProcessing.load_from_file(path=transcript_path, file_type='csv')\n",
+ "\n",
+ "raw_transcripts_df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 33,
- "id": "bebc37fa",
+ "execution_count": 6,
+ "id": "7cf469ac",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Text | \n",
- " Start Time | \n",
- " Duration | \n",
- " Video ID | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 2085 | \n",
- " >> We are in danger of the NFL live curse | \n",
- " 1010.240 | \n",
- " 6.560 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2086 | \n",
- " affecting Super Bowl 60, but it's not | \n",
- " 1014.800 | \n",
- " 4.159 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2087 | \n",
- " going to happen. Go with the Patriots. | \n",
- " 1016.800 | \n",
- " 3.120 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2088 | \n",
- " >> YES, BOOGIE. | \n",
- " 1018.959 | \n",
- " 3.521 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2089 | \n",
- " >> YOU GUYS LOVE ME. I need to. All right, | \n",
- " 1019.920 | \n",
- " 4.720 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2090 | \n",
- " but here's why. I actually think Drake | \n",
- " 1022.480 | \n",
- " 3.680 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2091 | \n",
- " May and his legs is going to be the key | \n",
- " 1024.640 | \n",
- " 3.120 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2092 | \n",
- " factor in this game. I think he's going | \n",
- " 1026.160 | \n",
- " 4.320 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2093 | \n",
- " to step up in huge moments. | \n",
- " 1027.760 | \n",
- " 4.400 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2094 | \n",
- " >> Yeah. And I'm doing it for you, Mina. | \n",
- " 1030.480 | \n",
- " 4.000 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2095 | \n",
- " I'm doing it for you. But seriously, | \n",
- " 1032.160 | \n",
- " 3.919 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2096 | \n",
- " Drake May's going to stand up and it's | \n",
- " 1034.480 | \n",
- " 2.880 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2097 | \n",
- " going to be great and they're going to | \n",
- " 1036.079 | \n",
- " 2.720 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2098 | \n",
- " win and you guys are all going to be | \n",
- " 1037.360 | \n",
- " 2.479 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2099 | \n",
- " wrong. And | \n",
- " 1038.799 | \n",
- " 3.040 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2100 | \n",
- " >> on Sunday night when we see you as part | \n",
- " 1039.839 | \n",
- " 3.681 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2101 | \n",
- " of the Handoff Show, which tune in | \n",
- " 1041.839 | \n",
- " 4.080 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2102 | \n",
- " around 1:00 a.m. Eastern time, we'll | \n",
- " 1043.520 | \n",
- " 3.679 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2103 | \n",
- " talk about how I'm right and you guys | \n",
- " 1045.919 | \n",
- " 3.760 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2104 | \n",
- " are wrong. And then we'll see you again | \n",
- " 1047.199 | \n",
- " 5.441 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2105 | \n",
- " on Monday in Disneyland. We've got all | \n",
- " 1049.679 | \n",
- " 4.721 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2106 | \n",
- " kinds of great coverage still coming up. | \n",
- " 1052.640 | \n",
- " 3.360 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2107 | \n",
- " Thanks to everybody for being here. | \n",
- " 1054.400 | \n",
- " 3.519 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2108 | \n",
- " Thanks to our incredible staff back in | \n",
- " 1056.000 | \n",
- " 3.919 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2109 | \n",
- " Bristol, Connecticut. We love you guys | \n",
- " 1057.919 | \n",
- " 2.538 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2110 | \n",
- " and we appreciate | \n",
- " 1059.919 | \n",
- " 1.281 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2111 | \n",
- " [cheering and applause] everybody for | \n",
- " 1060.457 | \n",
- " 3.543 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2112 | \n",
- " tuning in. Enjoy Super Bowl 60 Sports | \n",
- " 1061.200 | \n",
- " 4.320 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2113 | \n",
- " Center with Matt and Hannah's next. | \n",
- " 1064.000 | \n",
- " 4.520 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- " | 2114 | \n",
- " >> Amen. | \n",
- " 1065.520 | \n",
- " 3.000 | \n",
- " ZZN7BAYeOtc | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Text Start Time Duration \\\n",
- "2085 >> We are in danger of the NFL live curse 1010.240 6.560 \n",
- "2086 affecting Super Bowl 60, but it's not 1014.800 4.159 \n",
- "2087 going to happen. Go with the Patriots. 1016.800 3.120 \n",
- "2088 >> YES, BOOGIE. 1018.959 3.521 \n",
- "2089 >> YOU GUYS LOVE ME. I need to. All right, 1019.920 4.720 \n",
- "2090 but here's why. I actually think Drake 1022.480 3.680 \n",
- "2091 May and his legs is going to be the key 1024.640 3.120 \n",
- "2092 factor in this game. I think he's going 1026.160 4.320 \n",
- "2093 to step up in huge moments. 1027.760 4.400 \n",
- "2094 >> Yeah. And I'm doing it for you, Mina. 1030.480 4.000 \n",
- "2095 I'm doing it for you. But seriously, 1032.160 3.919 \n",
- "2096 Drake May's going to stand up and it's 1034.480 2.880 \n",
- "2097 going to be great and they're going to 1036.079 2.720 \n",
- "2098 win and you guys are all going to be 1037.360 2.479 \n",
- "2099 wrong. And 1038.799 3.040 \n",
- "2100 >> on Sunday night when we see you as part 1039.839 3.681 \n",
- "2101 of the Handoff Show, which tune in 1041.839 4.080 \n",
- "2102 around 1:00 a.m. Eastern time, we'll 1043.520 3.679 \n",
- "2103 talk about how I'm right and you guys 1045.919 3.760 \n",
- "2104 are wrong. And then we'll see you again 1047.199 5.441 \n",
- "2105 on Monday in Disneyland. We've got all 1049.679 4.721 \n",
- "2106 kinds of great coverage still coming up. 1052.640 3.360 \n",
- "2107 Thanks to everybody for being here. 1054.400 3.519 \n",
- "2108 Thanks to our incredible staff back in 1056.000 3.919 \n",
- "2109 Bristol, Connecticut. We love you guys 1057.919 2.538 \n",
- "2110 and we appreciate 1059.919 1.281 \n",
- "2111 [cheering and applause] everybody for 1060.457 3.543 \n",
- "2112 tuning in. Enjoy Super Bowl 60 Sports 1061.200 4.320 \n",
- "2113 Center with Matt and Hannah's next. 1064.000 4.520 \n",
- "2114 >> Amen. 1065.520 3.000 \n",
- "\n",
- " Video ID \n",
- "2085 ZZN7BAYeOtc \n",
- "2086 ZZN7BAYeOtc \n",
- "2087 ZZN7BAYeOtc \n",
- "2088 ZZN7BAYeOtc \n",
- "2089 ZZN7BAYeOtc \n",
- "2090 ZZN7BAYeOtc \n",
- "2091 ZZN7BAYeOtc \n",
- "2092 ZZN7BAYeOtc \n",
- "2093 ZZN7BAYeOtc \n",
- "2094 ZZN7BAYeOtc \n",
- "2095 ZZN7BAYeOtc \n",
- "2096 ZZN7BAYeOtc \n",
- "2097 ZZN7BAYeOtc \n",
- "2098 ZZN7BAYeOtc \n",
- "2099 ZZN7BAYeOtc \n",
- "2100 ZZN7BAYeOtc \n",
- "2101 ZZN7BAYeOtc \n",
- "2102 ZZN7BAYeOtc \n",
- "2103 ZZN7BAYeOtc \n",
- "2104 ZZN7BAYeOtc \n",
- "2105 ZZN7BAYeOtc \n",
- "2106 ZZN7BAYeOtc \n",
- "2107 ZZN7BAYeOtc \n",
- "2108 ZZN7BAYeOtc \n",
- "2109 ZZN7BAYeOtc \n",
- "2110 ZZN7BAYeOtc \n",
- "2111 ZZN7BAYeOtc \n",
- "2112 ZZN7BAYeOtc \n",
- "2113 ZZN7BAYeOtc \n",
- "2114 ZZN7BAYeOtc "
- ]
- },
- "execution_count": 33,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "raw_transcripts_filter_df = raw_transcripts_df.tail(30)\n",
- "raw_transcripts_filter_df"
+ "# dfs = []\n",
+ "\n",
+ "# for transcript in tqdm(transcripts):\n",
+ "# transcript_path = os.path.join(yt_data_path, transcript)\n",
+ "# df = DataProcessing.load_from_file(path=transcript_path, file_type='csv')\n",
+ "# dfs.append(df)\n",
+ "\n",
+ "# raw_transcripts_df = DataProcessing.concat_dfs(dfs)\n",
+ "# raw_transcripts_df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 7,
"id": "2fb60bd5",
"metadata": {},
"outputs": [],
"source": [
"def clean_data(df) -> list[str]:\n",
- " \"\"\"Rows can contain multiple sentences and some sentences can be on multiple rows, so ensure we \n",
- " join proper sentences together.\n",
+ " \"\"\"\n",
+ " Split running text into sentence-like segments.\n",
+ " Rules:\n",
+ " - Split only where a period (.) or question mark (?) is immediately followed by\n",
+ " whitespace. This avoids splitting decimals like ``4.5`` (digit after the dot,\n",
+ " not a space).\n",
+ " - Leading/trailing whitespace is stripped from each segment.\n",
+ " - Empty segments are dropped.\n",
+ " Caveat:\n",
+ " - Abbreviations such as ``Mr. Smith`` still match \". \" and may split incorrectly;\n",
+ " handle those with a tokenizer or an allowlist if needed.\n",
+ " Parameters\n",
+ " ----------\n",
+ " text : dataframe\n",
+ " Full text to segment (e.g. one block of joined transcript lines).\n",
+ " Returns\n",
+ " -------\n",
+ " list of str\n",
+ " Non-empty strings, each intended as one sentence or clause.\n",
" \"\"\"\n",
"\n",
- " text = df.Text.to_list()\n",
- " text_joined = ' '.join(text)\n",
- " # print(f\"{text_joined}\")\n",
- " text_split = text_joined.split('.')\n",
+ " text_joined = ' '.join(df.Text.to_list())\n",
+ " raw_parts = re.split(r'(?<=[.?])\\s+', text_joined)\n",
+ " text_split = [p.strip() for p in raw_parts if p.strip()]\n",
+ "\n",
" return text_split"
]
},
{
"cell_type": "code",
- "execution_count": 35,
- "id": "38f77818",
+ "execution_count": 8,
+ "id": "af279592",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[\">> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen\",\n",
- " ' Go with the Patriots',\n",
- " ' >> YES, BOOGIE',\n",
- " ' >> YOU GUYS LOVE ME',\n",
- " ' I need to',\n",
- " \" All right, but here's why\",\n",
- " ' I actually think Drake May and his legs is going to be the key factor in this game',\n",
- " \" I think he's going to step up in huge moments\",\n",
- " ' >> Yeah',\n",
- " \" And I'm doing it for you, Mina\",\n",
- " \" I'm doing it for you\",\n",
- " \" But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong\",\n",
- " ' And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a',\n",
- " 'm',\n",
- " \" Eastern time, we'll talk about how I'm right and you guys are wrong\",\n",
- " \" And then we'll see you again on Monday in Disneyland\",\n",
- " \" We've got all kinds of great coverage still coming up\",\n",
- " ' Thanks to everybody for being here',\n",
- " ' Thanks to our incredible staff back in Bristol, Connecticut',\n",
- " ' We love you guys and we appreciate [cheering and applause] everybody for tuning in',\n",
- " \" Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next\",\n",
- " ' >> Amen',\n",
- " '']"
+ "['Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365.',\n",
+ " 'My name is Jason Logan.',\n",
+ " 'I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds.',\n",
+ " 'And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman.',\n",
+ " 'Todd, happy bye week to you.',\n",
+ " 'The Seahawks, the Patriots get a little bit of a breather, a little bit of downtime this week, but I always found the bye was like my busiest time, at least in terms of what I need to do, the capping, the content, the planning, all these things, everything that we do at Covers for Super Bowl was very, very busy.',\n",
+ " 'For you as a pro better, what do these two weeks look like?',\n",
+ " 'The schedule has changed considerably from what it is during the bye week now versus what it would have been during the bye week 10 years ago.',\n",
+ " \"I mean, J-Lo, we had player props out at most of the prominent books, bet three, six, five, as well as early as Monday morning where you've been in the industry long enough.\",\n",
+ " 'And this is going to make us sound really old.',\n",
+ " \"You didn't start to get player props until Friday, Saturday, Sunday, leading up to the big game in and of itself.\",\n",
+ " \"So you kind of have to have rough projections, know which players that you've targeted.\",\n",
+ " 'You want to jump on right away.',\n",
+ " \"And then you begin to dig into some of the X's and O's and try and figure out, okay, where can I add to those positions?\",\n",
+ " 'Where can I find my biggest edges?',\n",
+ " \"and what have the odds makers not properly accounted for versus current form compared to season-long metrics where they'll heavily weight them towards a lot of individual player performances.\",\n",
+ " 'Yeah, I was betting player props Sunday night.',\n",
+ " 'They were out Sunday night in a few books.',\n",
+ " 'And that was first crack.',\n",
+ " 'First crack that Sunday compared to first crack the Sunday before the Super Bowl a decade ago.',\n",
+ " 'Yeah, I remember the Thursday before the Super Bowl and all the props would come out.',\n",
+ " 'It was like that was the big thing.',\n",
+ " 'But, yeah, times, they are changing.',\n",
+ " \"Before we start the clock here on what's going to be the second last podcast of the season, I can't believe that, a simple ask.\",\n",
+ " \"If you have been listening to us all season or if you're just finding us for the first time here ahead of Super Bowl, we ask that if you like what you're hearing, let us know.\",\n",
+ " 'Let other people know.',\n",
+ " 'Rate and review the podcast on those pod platforms.',\n",
+ " \"If you're watching us on YouTube, like, subscribe to the Covers YouTube channel, and leave a comment.\",\n",
+ " 'What are you betting in the big game?',\n",
+ " \"We'd like to know.\",\n",
+ " \"all right todd let's have 600 super seconds on the clock all right let's break down the line here super bowl 60 the spread opens as low as three and a half but we see a quick move they forget about four they go four and a half that's where we see this right now we have seen five we have seen some five and a halves uh todd for me i mean this I look at this, this is the best spread that bookies could ask for right now because of all that wiggle room in the middle.\",\n",
+ " 'What are you seeing from this spread?',\n",
+ " 'What do you like?',\n",
+ " 'What have you got down on early?',\n",
+ " 'You mentioned where the number opens at 3.5.',\n",
+ " 'The look-ahead number going into the championship Sunday, you see a couple of books stick to their guns.',\n",
+ " \"Then we did see some money come in on the favor, to your point, getting stuck in no man's land where books are going to have a ton of flexibility.\",\n",
+ " 'The opportunity to move down to 4 if they see professional money most likely coming in on New England, or drift the number a lot further out in that 5-5.5 range if the public comes in on one side, or I guess I should say if the professionals decide they want to back the Seahawks.',\n",
+ " \"For me, the 6 was going to be my buy point on the dog, and it wasn't widely available, so I don't really have a true position on the side in this game early on.\",\n",
+ " 'My number was 4.',\n",
+ " \"I want to dig into it a little bit, figure out if there's a matchup or two that I think one of these sides can exploit.\",\n",
+ " \"My initial inclination was that if you're going to give me a good defense, that's going to have the opportunity to catch points in this particular spot.\",\n",
+ " \"I'm typically going to gravitate towards that more often than not, but don't have a full position on this game yet.\",\n",
+ " \"And J-Lo, I'm not quite sure the betting market is going to allow me an opportunity to get involved with the number that I've targeted.\",\n",
+ " 'Yeah, for me, knowing what the look-ahead spread was, I knew we were going to see a money line on Seattle around 2-1 for that favorite.',\n",
+ " \"And that's where it came out, and that's where I took them right away.\",\n",
+ " \"When it comes to four and four-and-a-half point favorites, I've kind of made it a rule for myself to say, you know what don't play the points just go with the money line i find that four to four and a half is it's been since kind of four became more of a key number since 2015 it's been kind of a no man's land where that four point favorite or the four and a half point favorite will win but not quite cover they're good enough to be favored by more than a field goal but not quite good enough to be favored by six and so for me i went to seattle straight out just i mean two games over the last two weeks the new england patriots win but don't cover they close a three and a half four point favorite against the denver broncos and the week before we see the chicago bears come up just short in the straight up category but find a way to cover as a three and a half four point underdog so i think that number is a lot more important than people give it credit for and to your point you're going to see a money line that may come down as well if you gravitate towards seattle closer to kickoff yeah i mean you're going to see oh we always see underdog money on the money line So there'll probably be some Patriots outright money showing up.\",\n",
+ " \"Let's talk total here.\",\n",
+ " 'Look ahead was 45, open 46 and a half.',\n",
+ " \"Now we're seeing 46 is 45 and a half.\",\n",
+ " 'A clear weather game for the Patriots.',\n",
+ " \"I'm leaning towards over in this one.\",\n",
+ " 'I want to know your thoughts.',\n",
+ " 'A weather game times two.',\n",
+ " 'I mean, played in a blizzard in the AFC championship game and played in freezing rain the week before against the Houston Texans.',\n",
+ " \"A game that snuck over, but it wasn't because of offensive execution from either of those two teams.\",\n",
+ " \"It's always interesting to figure out how much a weather is suppressing a total.\",\n",
+ " 'In a game like this, it appears the forecast in Santa Clara is going to be pretty good.',\n",
+ " \"Now, it is an outdoor venue, so we're not quite sure exactly what that track is going to look like.\",\n",
+ " \"We've seen games played in domes typically offer a faster track for both these teams in the past.\",\n",
+ " 'For me, 47 was kind of the number that I was looking for to make a case to go under.',\n",
+ " \"If I am going to get involved in a total, I'll probably gravitate a little bit more towards looking at that first half number.\",\n",
+ " 'But wish I had stronger convictions right now because I do think you have suppressed totals there.',\n",
+ " \"But you also have a Seahawks offense that's going to be stepping up in class given the last three games, two against the 49ers, one against the Rams.\",\n",
+ " \"I'm not sure either of those secondaries would be described as anything more than a bottom six unit.\",\n",
+ " 'Yeah, I always look at under in the derivative market, specifically in that first quarter unders.',\n",
+ " 'Those tend to be lower scoring frames.',\n",
+ " 'Both teams a little jacked up.',\n",
+ " 'You got two quarterbacks a little tight on those throws.',\n",
+ " 'We see some miscues and things like that.',\n",
+ " 'So I do lean towards those unders in the first quarter.',\n",
+ " \"All right, we've still got a lot of time here before Super Bowl kickoff.\",\n",
+ " \"Let's talk do's and don'ts for sports bettors.\",\n",
+ " \"I'm going to start with a do.\",\n",
+ " \"And I'm going to say do take advantage of the live in-game odds, especially when it comes to any early game anomalies.\",\n",
+ " 'If the Patriots get up quick, but they do so on, say, a defensive score or a special team score, anything weird that happens early on, usually you can take an advantage of a market that will overcorrect to that, especially with everyone and their dog betting in this game and people chasing their bets.',\n",
+ " 'So any early game anomalies, look and see if you can find some buyback points in the in-game markets.',\n",
+ " 'Todd, you got a do for us in terms of Super Bowl betting?',\n",
+ " \"I do, but I want to piggyback what you said, and I think the other interesting angle that you can take, it's not just side total.\",\n",
+ " \"It's live player props as well that get heavily impacted by game state.\",\n",
+ " 'So I think there are a lot more opportunities now than there would have been when we had this discussion, say, five, seven, ten years ago in those particular live markets.',\n",
+ " \"Dues for me, I feel like I'm a man that is absolutely no fun to watch the game with.\",\n",
+ " 'Do make a case to bet a lot of player performances to come in under their posted totals.',\n",
+ " \"I know it's not fun to root against anything happening during the course of a football game, You look at a lot of these markets that are set.\",\n",
+ " 'People are going to gravitate towards rooting for things to happen.',\n",
+ " \"They're going to see low yardage totals with those third and fourth string wide receivers and tight ends and hope those players can come up with a catcher to do look to bet things under and do be willing to lay prices to be able to do so.\",\n",
+ " 'And are we doing that on closer to kickoff, waiting for those odds to go up?',\n",
+ " 'Certain players, I would say yes.',\n",
+ " 'Other players, sometimes you have to jump a little bit sooner.',\n",
+ " \"but I would not be running to bet JSN under his receiving total until the 23rd hour let's put it that way all right well I've got a don't here and this is more of an etiquette thing if you're at a Super Bowl party if you're with a group and that group is maybe not of the betting guild don't constantly talk about your bets I'm someone that does this for for a job and I can't stand those people if you listen if you're with your buddies and it's the group ride parlay and this is your group chat that you're with you know party up go crazy have not go nuts but for god's sakes read the room and don't get down on everyone because you're losing some wager celebrate your wins you hit a 25 to 1 first touchdown winner celebrate it have fun if you lose on heads don't throw a fit don't bring everyone down keep your keep your betting excitement in check that that's my don't todd you got a don't for us you know one of my don'ts is don't be a superbowl grinch like me who typically watches the big game every year by himself given some of the immediate responsibilities that I have.\",\n",
+ " 'I can control the temperature.',\n",
+ " 'I can control the alcohol intake and the food and everything else.',\n",
+ " \"But for me, it's don't chase some of those long shot prices with the majority of your bankroll.\",\n",
+ " \"It's more than fine to get involved in some of those long shot markets.\",\n",
+ " \"But at the same time, if anything, you're not getting the necessary value and upside for trying to look for some of those depth players to go over those laddering numbers that are out there or a variety of other markets.\",\n",
+ " \"So don't just chase that plus money.\",\n",
+ " 'Be judicious about how you allocate your bankroll.',\n",
+ " 'All right, time to talk Super Bowl MVP odds.',\n",
+ " 'Todd, who do you like to be the most valuable player Super Bowl 60?',\n",
+ " \"So J-Lo, normally you're gonna gravitate towards the starting quarterback of the team that you think is going to win the game, but what fun would that be in a game like this?\",\n",
+ " 'I am going to take a skilled position player for a team that I think wins the game, and that would lead me to Ramondre Stevenson at 28 to one.',\n",
+ " \"look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored.\",\n",
+ " 'He is the most disruptive player on this team.',\n",
+ " 'Six sacks, 11 tackles for a loss, three forced fumbles, scooped up all of those in the regular season.',\n",
+ " 'Two sacks, three forced fumbles in the playoffs so far.',\n",
+ " \"The Patriots pass protection hasn't been great.\",\n",
+ " 'I like Tank, MVP, plus 10,000.',\n",
+ " 'All right, two-minute drill time.',\n",
+ " 'Early bets here for Super Bowl 60.',\n",
+ " \"I'm going with Seattle receiver Raheed Shaheed, over 21.5 yards, receiving minus 110.\",\n",
+ " 'since he came over at the trade deadline.',\n",
+ " 'Seattle has faced a lot of opponents that run zone.',\n",
+ " 'A lot of those defense runs zone exclusively.',\n",
+ " 'He is a much better receiver at man.',\n",
+ " 'And what does he face in the Patriots?',\n",
+ " 'He faces a defense that runs one of the highest rates of man.',\n",
+ " \"There's also someone that likes throwing against man, and that's Sam Darnold.\",\n",
+ " 'So I think Shahid gets loose here.',\n",
+ " 'He could put this one away in one catch.',\n",
+ " 'Over 21 and a half yards receiving.',\n",
+ " \"Rashid Shahid still on my shit list because he didn't want to take contact over a half a rushing yard with the end around.\",\n",
+ " 'decided he was going to dance around.',\n",
+ " 'And example number 758.',\n",
+ " 'I had it.',\n",
+ " \"Why you can't trust a Diva wide receiver as a ball carrier in the backfield.\",\n",
+ " \"For me, I'm going to go to that Seattle Seahawks backfield though and take Kenneth Walker under his rushing total at 75 and a half.\",\n",
+ " 'Look, Walker started out like a house on fire last week against the Rams.',\n",
+ " 'Eight carries early on in that game for 35 yards and then kind of petered out.',\n",
+ " \"11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense.\",\n",
+ " 'Now he steps up in class against the Patriots run defense that is truly elite when Milton Williams is back in the full to anchor that defensive line.',\n",
+ " 'Seattle does love to run the football on early downs.',\n",
+ " 'I think they may have to go away from that approach.',\n",
+ " \"And even without Zach Charbonnet, I just don't see Kenneth Walker getting to mid-70s or higher.\",\n",
+ " 'I have him projected J-Lo in the high 60s, under 75.5 for the bell cow back wearing Seahawks colors.',\n",
+ " \"All right, I'm going to go to the tight end for the Patriots.\",\n",
+ " 'Hunter Henry over 36.5 yards, receiving minus 110.',\n",
+ " \"He's been taking some punishment, and now he faces a ferocious Seattle pass rush that can get pressure with just four.\",\n",
+ " \"He's not going to have time for downfield plays to develop.\",\n",
+ " \"He's going to have to hit those quick hits, and Hunter Henry has been that pressure release.\",\n",
+ " 'We have seen Seattle give up a lot of targets, a lot of receptions, two tight ends.',\n",
+ " 'Projections for Henry, sit as high as 46.',\n",
+ " 'I like him to go over 36.5.',\n",
+ " 'Small flyer for me, a Seattle Seahawks receiver, not named Cooper Cup, not named Rasheed Sheed, not named Jackson Smith and Jigba.',\n",
+ " 'Jake Bobo, over a half a receiving yard at plus $1.15.',\n",
+ " 'Bobo had a catch in each of the last two games, 16 yards, 17 yards.',\n",
+ " \"I think he's a guy that may be targeted a couple times here.\",\n",
+ " 'Just one catch, get us one yard, take me there at plus $1.15.',\n",
+ " 'All right, you got two weeks to scheme up some stuff.',\n",
+ " 'You got to get tricky sometimes and throw those guys in there.',\n",
+ " 'He did not have a touchdown in that NFC Championship game.',\n",
+ " 'Was he the guy that scored a touchdown in that one?',\n",
+ " 'No, he did have a touchdown there.',\n",
+ " 'Had one catch each of the last two games, 16 yards, 17 yards, and a TD.',\n",
+ " \"All I'm just looking for is a receiving yard here to get us across the finish line.\",\n",
+ " \"Yeah, that's it.\",\n",
+ " \"He's not asking for much, Bobo.\",\n",
+ " \"Just, you know, that's it.\",\n",
+ " \"That's it.\",\n",
+ " \"I can't wait to see you freak out on Super Bowl Sunday when he doesn't get that.\",\n",
+ " 'But remember the etiquette.',\n",
+ " 'Yeah.',\n",
+ " 'All right.',\n",
+ " 'A couple of targets.',\n",
+ " 'A couple of targets.',\n",
+ " 'A couple of drops.',\n",
+ " 'You know how this goes.',\n",
+ " 'I know exactly how it goes.',\n",
+ " \"It'll be all right.\",\n",
+ " 'All right.',\n",
+ " 'That is it for our first Super Bowl pod.',\n",
+ " 'We will have another one next week.',\n",
+ " 'A big thanks to Chris behind the scenes.',\n",
+ " 'A big thank you to Todd for joining us once again.',\n",
+ " \"thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're tipping your hand given what's in your cup uh when we recorded this video earlier today you claim that it's not blue gatorade that it's just a blue tinted cup but i'm gonna go with blue look we have a couple of teams wearing darker colors there that could play a role, I think blue is going to be my preferred way to go, mainly because, look, I'm a big fan of that particular flavor myself, so I like to use firsthand knowledge to get me across the finish line, more so than that fruit punch and water or anything else.\",\n",
+ " \"So let's go with blue from the Gatorade color.\",\n",
+ " 'See, I always used to lean towards water because I knew it was there, but then I got to remember, too, like this is a corporate marketing spot right here.',\n",
+ " 'And the last thing the Gatorade bigwigs want to see is water being thrown on the winning coach.',\n",
+ " \"I'm going to go lime slash green, you know, traditional Gatorade color.\",\n",
+ " 'But I do have the Seahawks money line, so might as well tie it to the Gatorade color.',\n",
+ " \"That'll probably be on the sideline.\",\n",
+ " \"And one more thing, Belichick, up for Hall of Fame, doesn't get in on the first time around.\",\n",
+ " 'I mean, the virtue signaling, the holier-than-thou approach that comes from the NFL brass is just mind-boggling to me that these kind of things are allowed to happen.',\n",
+ " \"I know Bill Belichick's record without Tom Brady as a head coach is nothing to write home about, but this is a guy that has six Super Bowls on his resume as a head coach, two as a defensive coordinator.\",\n",
+ " \"I mean, the guy has basically been in 25% of the big games that we've seen played since its inception in the late 60s.\",\n",
+ " 'Bill Belichick deserves to be in, and I think this is just dumb.',\n",
+ " \"I know people point to Vince Lombardi and Joe Gibbs not getting in on their first ballot, but if you're going to keep Bill Belichick out on his first ballot, you know what this tells me, J-Lo?\",\n",
+ " \"Tom Brady shouldn't get in on his first ballot either, because he apparently only accomplished as much as he did on the football field because of Deflategate and Spygate, the two things working against a surly Bill Belichick, who just happens to share my same alma mater.\",\n",
+ " 'Yeah, this is, I mean, and then you have two guys coaching in the big game, Rabel and McDaniels from that Belichick coaching tree.',\n",
+ " 'It just, yeah.',\n",
+ " 'People got to take the feelings.',\n",
+ " 'Defies logic.',\n",
+ " 'People got to take the feelings out of this thing and just put Bill in.',\n",
+ " \"Maybe if he doesn't go coach at UNC and have the season that he has, does that tarnish things?\",\n",
+ " \"I don't know, but I'm pretty shocked to see.\",\n",
+ " 'What else do you want from a head coach to get in?',\n",
+ " 'All right, just one more podcast left.',\n",
+ " \"We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday.\",\n",
+ " \"And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets.\"]"
]
},
- "execution_count": 35,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "cleaned_transcripts = clean_data(raw_transcripts_filter_df)\n",
+ "cleaned_transcripts = clean_data(raw_transcripts_df)\n",
"cleaned_transcripts"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "46962fa4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "198"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(cleaned_transcripts)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "6c10f2fb",
@@ -780,7 +533,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 10,
"id": "9e05974a",
"metadata": {},
"outputs": [],
@@ -821,25 +574,19 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 11,
"id": "a8fa141d",
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Warning: API_KEY environment variable not set\n"
- ]
- },
{
"data": {
"text/plain": [
- "{'llama-3.1-8b-instant': ,\n",
- " 'llama-3.3-70b-versatile': }"
+ "{'llama-3.1-8b-instant': ,\n",
+ " 'llama-3.3-70b-versatile': ,\n",
+ " 'openai/gpt-oss-120b': }"
]
},
- "execution_count": 37,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -849,7 +596,7 @@
"\n",
"# Option 3: All NaviGator models\n",
"# models = tgmf.create_instances(tgmf.get_navigator_model_names())\n",
- "models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'granite-3.3-8b-instruct'])\n",
+ "models = tgmf.create_instances(['llama-3.1-8b-instant', 'llama-3.3-70b-versatile', 'openai/gpt-oss-120b'])\n",
"models"
]
},
@@ -863,7 +610,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 12,
"id": "66bce6de",
"metadata": {},
"outputs": [
@@ -871,35 +618,213 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "0 --- Sentence: >> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen\n",
- " Model: llama-3.1-8b-instant | Label: 1\n",
- " Model: llama-3.3-70b-versatile | Label: 1\n",
- "1 --- Sentence: Go with the Patriots\n",
+ "0 --- Sentence: Hello and welcome to the Sharp 600 brought to you by Covers.com and presented by Bet365.\n",
" Model: llama-3.1-8b-instant | Label: 0\n",
- " Model: llama-3.3-70b-versatile | Label: 1\n",
- "2 --- Sentence: >> YES, BOOGIE\n",
+ " Model: llama-3.3-70b-versatile | Label: 0\n",
+ " Model: openai/gpt-oss-120b | Label: 0\n",
+ "1 --- Sentence: My name is Jason Logan.\n",
+ " Model: llama-3.1-8b-instant | Label: 0\n",
+ " Model: llama-3.3-70b-versatile | Label: 0\n",
+ " Model: openai/gpt-oss-120b | Label: 0\n",
+ "2 --- Sentence: I will be your host here for the next 10 minutes as we take our first bites of Super Bowl 60 odds.\n",
" Model: llama-3.1-8b-instant | Label: 0\n",
" Model: llama-3.3-70b-versatile | Label: 0\n",
- "3 --- Sentence: >> YOU GUYS LOVE ME\n",
- "4 --- Sentence: I need to\n",
- "5 --- Sentence: All right, but here's why\n",
- "6 --- Sentence: I actually think Drake May and his legs is going to be the key factor in this game\n",
- "7 --- Sentence: I think he's going to step up in huge moments\n",
- "8 --- Sentence: >> Yeah\n",
- "9 --- Sentence: And I'm doing it for you, Mina\n",
- "10 --- Sentence: I'm doing it for you\n",
- "11 --- Sentence: But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong\n",
- "12 --- Sentence: And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a\n",
- "13 --- Sentence: m\n",
- "14 --- Sentence: Eastern time, we'll talk about how I'm right and you guys are wrong\n",
- "15 --- Sentence: And then we'll see you again on Monday in Disneyland\n",
- "16 --- Sentence: We've got all kinds of great coverage still coming up\n",
- "17 --- Sentence: Thanks to everybody for being here\n",
- "18 --- Sentence: Thanks to our incredible staff back in Bristol, Connecticut\n",
- "19 --- Sentence: We love you guys and we appreciate [cheering and applause] everybody for tuning in\n",
- "20 --- Sentence: Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next\n",
- "21 --- Sentence: >> Amen\n",
- "22 --- Sentence: \n"
+ " Model: openai/gpt-oss-120b | Label: 0\n",
+ "3 --- Sentence: And joining us for that dinner, for that snack time, is former odds maker, current professional bettor Todd Furman.\n",
+ "4 --- Sentence: Todd, happy bye week to you.\n",
+ "5 --- Sentence: The Seahawks, the Patriots get a little bit of a breather, a little bit of downtime this week, but I always found the bye was like my busiest time, at least in terms of what I need to do, the capping, the content, the planning, all these things, everything that we do at Covers for Super Bowl was very, very busy.\n",
+ "6 --- Sentence: For you as a pro better, what do these two weeks look like?\n",
+ "7 --- Sentence: The schedule has changed considerably from what it is during the bye week now versus what it would have been during the bye week 10 years ago.\n",
+ "8 --- Sentence: I mean, J-Lo, we had player props out at most of the prominent books, bet three, six, five, as well as early as Monday morning where you've been in the industry long enough.\n",
+ "9 --- Sentence: And this is going to make us sound really old.\n",
+ "10 --- Sentence: You didn't start to get player props until Friday, Saturday, Sunday, leading up to the big game in and of itself.\n",
+ "11 --- Sentence: So you kind of have to have rough projections, know which players that you've targeted.\n",
+ "12 --- Sentence: You want to jump on right away.\n",
+ "13 --- Sentence: And then you begin to dig into some of the X's and O's and try and figure out, okay, where can I add to those positions?\n",
+ "14 --- Sentence: Where can I find my biggest edges?\n",
+ "15 --- Sentence: and what have the odds makers not properly accounted for versus current form compared to season-long metrics where they'll heavily weight them towards a lot of individual player performances.\n",
+ "16 --- Sentence: Yeah, I was betting player props Sunday night.\n",
+ "17 --- Sentence: They were out Sunday night in a few books.\n",
+ "18 --- Sentence: And that was first crack.\n",
+ "19 --- Sentence: First crack that Sunday compared to first crack the Sunday before the Super Bowl a decade ago.\n",
+ "20 --- Sentence: Yeah, I remember the Thursday before the Super Bowl and all the props would come out.\n",
+ "21 --- Sentence: It was like that was the big thing.\n",
+ "22 --- Sentence: But, yeah, times, they are changing.\n",
+ "23 --- Sentence: Before we start the clock here on what's going to be the second last podcast of the season, I can't believe that, a simple ask.\n",
+ "24 --- Sentence: If you have been listening to us all season or if you're just finding us for the first time here ahead of Super Bowl, we ask that if you like what you're hearing, let us know.\n",
+ "25 --- Sentence: Let other people know.\n",
+ "26 --- Sentence: Rate and review the podcast on those pod platforms.\n",
+ "27 --- Sentence: If you're watching us on YouTube, like, subscribe to the Covers YouTube channel, and leave a comment.\n",
+ "28 --- Sentence: What are you betting in the big game?\n",
+ "29 --- Sentence: We'd like to know.\n",
+ "30 --- Sentence: all right todd let's have 600 super seconds on the clock all right let's break down the line here super bowl 60 the spread opens as low as three and a half but we see a quick move they forget about four they go four and a half that's where we see this right now we have seen five we have seen some five and a halves uh todd for me i mean this I look at this, this is the best spread that bookies could ask for right now because of all that wiggle room in the middle.\n",
+ "31 --- Sentence: What are you seeing from this spread?\n",
+ "32 --- Sentence: What do you like?\n",
+ "33 --- Sentence: What have you got down on early?\n",
+ "34 --- Sentence: You mentioned where the number opens at 3.5.\n",
+ "35 --- Sentence: The look-ahead number going into the championship Sunday, you see a couple of books stick to their guns.\n",
+ "36 --- Sentence: Then we did see some money come in on the favor, to your point, getting stuck in no man's land where books are going to have a ton of flexibility.\n",
+ "37 --- Sentence: The opportunity to move down to 4 if they see professional money most likely coming in on New England, or drift the number a lot further out in that 5-5.5 range if the public comes in on one side, or I guess I should say if the professionals decide they want to back the Seahawks.\n",
+ "38 --- Sentence: For me, the 6 was going to be my buy point on the dog, and it wasn't widely available, so I don't really have a true position on the side in this game early on.\n",
+ "39 --- Sentence: My number was 4.\n",
+ "40 --- Sentence: I want to dig into it a little bit, figure out if there's a matchup or two that I think one of these sides can exploit.\n",
+ "41 --- Sentence: My initial inclination was that if you're going to give me a good defense, that's going to have the opportunity to catch points in this particular spot.\n",
+ "42 --- Sentence: I'm typically going to gravitate towards that more often than not, but don't have a full position on this game yet.\n",
+ "43 --- Sentence: And J-Lo, I'm not quite sure the betting market is going to allow me an opportunity to get involved with the number that I've targeted.\n",
+ "44 --- Sentence: Yeah, for me, knowing what the look-ahead spread was, I knew we were going to see a money line on Seattle around 2-1 for that favorite.\n",
+ "45 --- Sentence: And that's where it came out, and that's where I took them right away.\n",
+ "46 --- Sentence: When it comes to four and four-and-a-half point favorites, I've kind of made it a rule for myself to say, you know what don't play the points just go with the money line i find that four to four and a half is it's been since kind of four became more of a key number since 2015 it's been kind of a no man's land where that four point favorite or the four and a half point favorite will win but not quite cover they're good enough to be favored by more than a field goal but not quite good enough to be favored by six and so for me i went to seattle straight out just i mean two games over the last two weeks the new england patriots win but don't cover they close a three and a half four point favorite against the denver broncos and the week before we see the chicago bears come up just short in the straight up category but find a way to cover as a three and a half four point underdog so i think that number is a lot more important than people give it credit for and to your point you're going to see a money line that may come down as well if you gravitate towards seattle closer to kickoff yeah i mean you're going to see oh we always see underdog money on the money line So there'll probably be some Patriots outright money showing up.\n",
+ "47 --- Sentence: Let's talk total here.\n",
+ "48 --- Sentence: Look ahead was 45, open 46 and a half.\n",
+ "49 --- Sentence: Now we're seeing 46 is 45 and a half.\n",
+ "50 --- Sentence: A clear weather game for the Patriots.\n",
+ "51 --- Sentence: I'm leaning towards over in this one.\n",
+ "52 --- Sentence: I want to know your thoughts.\n",
+ "53 --- Sentence: A weather game times two.\n",
+ "54 --- Sentence: I mean, played in a blizzard in the AFC championship game and played in freezing rain the week before against the Houston Texans.\n",
+ "55 --- Sentence: A game that snuck over, but it wasn't because of offensive execution from either of those two teams.\n",
+ "56 --- Sentence: It's always interesting to figure out how much a weather is suppressing a total.\n",
+ "57 --- Sentence: In a game like this, it appears the forecast in Santa Clara is going to be pretty good.\n",
+ "58 --- Sentence: Now, it is an outdoor venue, so we're not quite sure exactly what that track is going to look like.\n",
+ "59 --- Sentence: We've seen games played in domes typically offer a faster track for both these teams in the past.\n",
+ "60 --- Sentence: For me, 47 was kind of the number that I was looking for to make a case to go under.\n",
+ "61 --- Sentence: If I am going to get involved in a total, I'll probably gravitate a little bit more towards looking at that first half number.\n",
+ "62 --- Sentence: But wish I had stronger convictions right now because I do think you have suppressed totals there.\n",
+ "63 --- Sentence: But you also have a Seahawks offense that's going to be stepping up in class given the last three games, two against the 49ers, one against the Rams.\n",
+ "64 --- Sentence: I'm not sure either of those secondaries would be described as anything more than a bottom six unit.\n",
+ "65 --- Sentence: Yeah, I always look at under in the derivative market, specifically in that first quarter unders.\n",
+ "66 --- Sentence: Those tend to be lower scoring frames.\n",
+ "67 --- Sentence: Both teams a little jacked up.\n",
+ "68 --- Sentence: You got two quarterbacks a little tight on those throws.\n",
+ "69 --- Sentence: We see some miscues and things like that.\n",
+ "70 --- Sentence: So I do lean towards those unders in the first quarter.\n",
+ "71 --- Sentence: All right, we've still got a lot of time here before Super Bowl kickoff.\n",
+ "72 --- Sentence: Let's talk do's and don'ts for sports bettors.\n",
+ "73 --- Sentence: I'm going to start with a do.\n",
+ "74 --- Sentence: And I'm going to say do take advantage of the live in-game odds, especially when it comes to any early game anomalies.\n",
+ "75 --- Sentence: If the Patriots get up quick, but they do so on, say, a defensive score or a special team score, anything weird that happens early on, usually you can take an advantage of a market that will overcorrect to that, especially with everyone and their dog betting in this game and people chasing their bets.\n",
+ "76 --- Sentence: So any early game anomalies, look and see if you can find some buyback points in the in-game markets.\n",
+ "77 --- Sentence: Todd, you got a do for us in terms of Super Bowl betting?\n",
+ "78 --- Sentence: I do, but I want to piggyback what you said, and I think the other interesting angle that you can take, it's not just side total.\n",
+ "79 --- Sentence: It's live player props as well that get heavily impacted by game state.\n",
+ "80 --- Sentence: So I think there are a lot more opportunities now than there would have been when we had this discussion, say, five, seven, ten years ago in those particular live markets.\n",
+ "81 --- Sentence: Dues for me, I feel like I'm a man that is absolutely no fun to watch the game with.\n",
+ "82 --- Sentence: Do make a case to bet a lot of player performances to come in under their posted totals.\n",
+ "83 --- Sentence: I know it's not fun to root against anything happening during the course of a football game, You look at a lot of these markets that are set.\n",
+ "84 --- Sentence: People are going to gravitate towards rooting for things to happen.\n",
+ "85 --- Sentence: They're going to see low yardage totals with those third and fourth string wide receivers and tight ends and hope those players can come up with a catcher to do look to bet things under and do be willing to lay prices to be able to do so.\n",
+ "86 --- Sentence: And are we doing that on closer to kickoff, waiting for those odds to go up?\n",
+ "87 --- Sentence: Certain players, I would say yes.\n",
+ "88 --- Sentence: Other players, sometimes you have to jump a little bit sooner.\n",
+ "89 --- Sentence: but I would not be running to bet JSN under his receiving total until the 23rd hour let's put it that way all right well I've got a don't here and this is more of an etiquette thing if you're at a Super Bowl party if you're with a group and that group is maybe not of the betting guild don't constantly talk about your bets I'm someone that does this for for a job and I can't stand those people if you listen if you're with your buddies and it's the group ride parlay and this is your group chat that you're with you know party up go crazy have not go nuts but for god's sakes read the room and don't get down on everyone because you're losing some wager celebrate your wins you hit a 25 to 1 first touchdown winner celebrate it have fun if you lose on heads don't throw a fit don't bring everyone down keep your keep your betting excitement in check that that's my don't todd you got a don't for us you know one of my don'ts is don't be a superbowl grinch like me who typically watches the big game every year by himself given some of the immediate responsibilities that I have.\n",
+ "90 --- Sentence: I can control the temperature.\n",
+ "91 --- Sentence: I can control the alcohol intake and the food and everything else.\n",
+ "92 --- Sentence: But for me, it's don't chase some of those long shot prices with the majority of your bankroll.\n",
+ "93 --- Sentence: It's more than fine to get involved in some of those long shot markets.\n",
+ "94 --- Sentence: But at the same time, if anything, you're not getting the necessary value and upside for trying to look for some of those depth players to go over those laddering numbers that are out there or a variety of other markets.\n",
+ "95 --- Sentence: So don't just chase that plus money.\n",
+ "96 --- Sentence: Be judicious about how you allocate your bankroll.\n",
+ "97 --- Sentence: All right, time to talk Super Bowl MVP odds.\n",
+ "98 --- Sentence: Todd, who do you like to be the most valuable player Super Bowl 60?\n",
+ "99 --- Sentence: So J-Lo, normally you're gonna gravitate towards the starting quarterback of the team that you think is going to win the game, but what fun would that be in a game like this?\n",
+ "100 --- Sentence: I am going to take a skilled position player for a team that I think wins the game, and that would lead me to Ramondre Stevenson at 28 to one.\n",
+ "101 --- Sentence: look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored.\n",
+ "102 --- Sentence: He is the most disruptive player on this team.\n",
+ "103 --- Sentence: Six sacks, 11 tackles for a loss, three forced fumbles, scooped up all of those in the regular season.\n",
+ "104 --- Sentence: Two sacks, three forced fumbles in the playoffs so far.\n",
+ "105 --- Sentence: The Patriots pass protection hasn't been great.\n",
+ "106 --- Sentence: I like Tank, MVP, plus 10,000.\n",
+ "107 --- Sentence: All right, two-minute drill time.\n",
+ "108 --- Sentence: Early bets here for Super Bowl 60.\n",
+ "109 --- Sentence: I'm going with Seattle receiver Raheed Shaheed, over 21.5 yards, receiving minus 110.\n",
+ "110 --- Sentence: since he came over at the trade deadline.\n",
+ "111 --- Sentence: Seattle has faced a lot of opponents that run zone.\n",
+ "112 --- Sentence: A lot of those defense runs zone exclusively.\n",
+ "113 --- Sentence: He is a much better receiver at man.\n",
+ "114 --- Sentence: And what does he face in the Patriots?\n",
+ "115 --- Sentence: He faces a defense that runs one of the highest rates of man.\n",
+ "116 --- Sentence: There's also someone that likes throwing against man, and that's Sam Darnold.\n",
+ "117 --- Sentence: So I think Shahid gets loose here.\n",
+ "118 --- Sentence: He could put this one away in one catch.\n",
+ "119 --- Sentence: Over 21 and a half yards receiving.\n",
+ "120 --- Sentence: Rashid Shahid still on my shit list because he didn't want to take contact over a half a rushing yard with the end around.\n",
+ "121 --- Sentence: decided he was going to dance around.\n",
+ "122 --- Sentence: And example number 758.\n",
+ "123 --- Sentence: I had it.\n",
+ "124 --- Sentence: Why you can't trust a Diva wide receiver as a ball carrier in the backfield.\n",
+ "125 --- Sentence: For me, I'm going to go to that Seattle Seahawks backfield though and take Kenneth Walker under his rushing total at 75 and a half.\n",
+ "126 --- Sentence: Look, Walker started out like a house on fire last week against the Rams.\n",
+ "127 --- Sentence: Eight carries early on in that game for 35 yards and then kind of petered out.\n",
+ "128 --- Sentence: 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense.\n",
+ "129 --- Sentence: Now he steps up in class against the Patriots run defense that is truly elite when Milton Williams is back in the full to anchor that defensive line.\n",
+ "130 --- Sentence: Seattle does love to run the football on early downs.\n",
+ "131 --- Sentence: I think they may have to go away from that approach.\n",
+ "132 --- Sentence: And even without Zach Charbonnet, I just don't see Kenneth Walker getting to mid-70s or higher.\n",
+ "133 --- Sentence: I have him projected J-Lo in the high 60s, under 75.5 for the bell cow back wearing Seahawks colors.\n",
+ "134 --- Sentence: All right, I'm going to go to the tight end for the Patriots.\n",
+ "135 --- Sentence: Hunter Henry over 36.5 yards, receiving minus 110.\n",
+ "136 --- Sentence: He's been taking some punishment, and now he faces a ferocious Seattle pass rush that can get pressure with just four.\n",
+ "137 --- Sentence: He's not going to have time for downfield plays to develop.\n",
+ "138 --- Sentence: He's going to have to hit those quick hits, and Hunter Henry has been that pressure release.\n",
+ "139 --- Sentence: We have seen Seattle give up a lot of targets, a lot of receptions, two tight ends.\n",
+ "140 --- Sentence: Projections for Henry, sit as high as 46.\n",
+ "141 --- Sentence: I like him to go over 36.5.\n",
+ "142 --- Sentence: Small flyer for me, a Seattle Seahawks receiver, not named Cooper Cup, not named Rasheed Sheed, not named Jackson Smith and Jigba.\n",
+ "143 --- Sentence: Jake Bobo, over a half a receiving yard at plus $1.15.\n",
+ "144 --- Sentence: Bobo had a catch in each of the last two games, 16 yards, 17 yards.\n",
+ "145 --- Sentence: I think he's a guy that may be targeted a couple times here.\n",
+ "146 --- Sentence: Just one catch, get us one yard, take me there at plus $1.15.\n",
+ "147 --- Sentence: All right, you got two weeks to scheme up some stuff.\n",
+ "148 --- Sentence: You got to get tricky sometimes and throw those guys in there.\n",
+ "149 --- Sentence: He did not have a touchdown in that NFC Championship game.\n",
+ "150 --- Sentence: Was he the guy that scored a touchdown in that one?\n",
+ "151 --- Sentence: No, he did have a touchdown there.\n",
+ "152 --- Sentence: Had one catch each of the last two games, 16 yards, 17 yards, and a TD.\n",
+ "153 --- Sentence: All I'm just looking for is a receiving yard here to get us across the finish line.\n",
+ "154 --- Sentence: Yeah, that's it.\n",
+ "155 --- Sentence: He's not asking for much, Bobo.\n",
+ "156 --- Sentence: Just, you know, that's it.\n",
+ "157 --- Sentence: That's it.\n",
+ "158 --- Sentence: I can't wait to see you freak out on Super Bowl Sunday when he doesn't get that.\n",
+ "159 --- Sentence: But remember the etiquette.\n",
+ "160 --- Sentence: Yeah.\n",
+ "161 --- Sentence: All right.\n",
+ "162 --- Sentence: A couple of targets.\n",
+ "163 --- Sentence: A couple of targets.\n",
+ "164 --- Sentence: A couple of drops.\n",
+ "165 --- Sentence: You know how this goes.\n",
+ "166 --- Sentence: I know exactly how it goes.\n",
+ "167 --- Sentence: It'll be all right.\n",
+ "168 --- Sentence: All right.\n",
+ "169 --- Sentence: That is it for our first Super Bowl pod.\n",
+ "170 --- Sentence: We will have another one next week.\n",
+ "171 --- Sentence: A big thanks to Chris behind the scenes.\n",
+ "172 --- Sentence: A big thank you to Todd for joining us once again.\n",
+ "173 --- Sentence: thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're tipping your hand given what's in your cup uh when we recorded this video earlier today you claim that it's not blue gatorade that it's just a blue tinted cup but i'm gonna go with blue look we have a couple of teams wearing darker colors there that could play a role, I think blue is going to be my preferred way to go, mainly because, look, I'm a big fan of that particular flavor myself, so I like to use firsthand knowledge to get me across the finish line, more so than that fruit punch and water or anything else.\n",
+ "174 --- Sentence: So let's go with blue from the Gatorade color.\n",
+ "175 --- Sentence: See, I always used to lean towards water because I knew it was there, but then I got to remember, too, like this is a corporate marketing spot right here.\n",
+ "176 --- Sentence: And the last thing the Gatorade bigwigs want to see is water being thrown on the winning coach.\n",
+ "177 --- Sentence: I'm going to go lime slash green, you know, traditional Gatorade color.\n",
+ "178 --- Sentence: But I do have the Seahawks money line, so might as well tie it to the Gatorade color.\n",
+ "179 --- Sentence: That'll probably be on the sideline.\n",
+ "180 --- Sentence: And one more thing, Belichick, up for Hall of Fame, doesn't get in on the first time around.\n",
+ "181 --- Sentence: I mean, the virtue signaling, the holier-than-thou approach that comes from the NFL brass is just mind-boggling to me that these kind of things are allowed to happen.\n",
+ "182 --- Sentence: I know Bill Belichick's record without Tom Brady as a head coach is nothing to write home about, but this is a guy that has six Super Bowls on his resume as a head coach, two as a defensive coordinator.\n",
+ "183 --- Sentence: I mean, the guy has basically been in 25% of the big games that we've seen played since its inception in the late 60s.\n",
+ "184 --- Sentence: Bill Belichick deserves to be in, and I think this is just dumb.\n",
+ "185 --- Sentence: I know people point to Vince Lombardi and Joe Gibbs not getting in on their first ballot, but if you're going to keep Bill Belichick out on his first ballot, you know what this tells me, J-Lo?\n",
+ "186 --- Sentence: Tom Brady shouldn't get in on his first ballot either, because he apparently only accomplished as much as he did on the football field because of Deflategate and Spygate, the two things working against a surly Bill Belichick, who just happens to share my same alma mater.\n",
+ "187 --- Sentence: Yeah, this is, I mean, and then you have two guys coaching in the big game, Rabel and McDaniels from that Belichick coaching tree.\n",
+ "188 --- Sentence: It just, yeah.\n",
+ "189 --- Sentence: People got to take the feelings.\n",
+ "190 --- Sentence: Defies logic.\n",
+ "191 --- Sentence: People got to take the feelings out of this thing and just put Bill in.\n",
+ "192 --- Sentence: Maybe if he doesn't go coach at UNC and have the season that he has, does that tarnish things?\n",
+ "193 --- Sentence: I don't know, but I'm pretty shocked to see.\n",
+ "194 --- Sentence: What else do you want from a head coach to get in?\n",
+ "195 --- Sentence: All right, just one more podcast left.\n",
+ "196 --- Sentence: We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday.\n",
+ "197 --- Sentence: And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets.\n"
]
}
],
@@ -953,7 +878,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 13,
"id": "e4791205",
"metadata": {},
"outputs": [
@@ -987,76 +912,76 @@
" \n",
" \n",
" \n",
- " | 40 | \n",
- " Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next | \n",
+ " 588 | \n",
+ " We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. | \n",
" {\"label\": 0} | \n",
" 0 | \n",
" None | \n",
" llama-3.1-8b-instant | \n",
"
\n",
" \n",
- " | 41 | \n",
- " Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next | \n",
- " {\"label\": 0} | \n",
- " 0 | \n",
+ " 589 | \n",
+ " We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. | \n",
+ " {\"label\": 1} | \n",
+ " 1 | \n",
" None | \n",
" llama-3.3-70b-versatile | \n",
"
\n",
" \n",
- " | 42 | \n",
- " >> Amen | \n",
- " {\"label\": 0} | \n",
+ " 590 | \n",
+ " We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. | \n",
+ " {\\n \"label\": 0\\n} | \n",
" 0 | \n",
" None | \n",
- " llama-3.1-8b-instant | \n",
+ " openai/gpt-oss-120b | \n",
"
\n",
" \n",
- " | 43 | \n",
- " >> Amen | \n",
+ " 591 | \n",
+ " And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. | \n",
" {\"label\": 0} | \n",
" 0 | \n",
" None | \n",
- " llama-3.3-70b-versatile | \n",
+ " llama-3.1-8b-instant | \n",
"
\n",
" \n",
- " | 44 | \n",
- " | \n",
- " {\"label\": 1} | \n",
- " 1 | \n",
+ " 592 | \n",
+ " And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. | \n",
+ " {\"label\": 0} | \n",
+ " 0 | \n",
" None | \n",
- " llama-3.1-8b-instant | \n",
+ " llama-3.3-70b-versatile | \n",
"
\n",
" \n",
- " | 45 | \n",
- " | \n",
+ " 593 | \n",
+ " And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. | \n",
" {\"label\": 0} | \n",
" 0 | \n",
" None | \n",
- " llama-3.3-70b-versatile | \n",
+ " openai/gpt-oss-120b | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " text \\\n",
- "40 Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next \n",
- "41 Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next \n",
- "42 >> Amen \n",
- "43 >> Amen \n",
- "44 \n",
- "45 \n",
+ " text \\\n",
+ "588 We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. \n",
+ "589 We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. \n",
+ "590 We're going to talk, go even deeper on Super Bowl 60 odds, our best bets, our touchdown picks, all coming next Wednesday. \n",
+ "591 And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. \n",
+ "592 And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. \n",
+ "593 And until then, I guess, enjoy whatever you're wagering on this weekend and best of luck with those bets. \n",
"\n",
- " raw_response llm_label llm_reasoning llm_name \n",
- "40 {\"label\": 0} 0 None llama-3.1-8b-instant \n",
- "41 {\"label\": 0} 0 None llama-3.3-70b-versatile \n",
- "42 {\"label\": 0} 0 None llama-3.1-8b-instant \n",
- "43 {\"label\": 0} 0 None llama-3.3-70b-versatile \n",
- "44 {\"label\": 1} 1 None llama-3.1-8b-instant \n",
- "45 {\"label\": 0} 0 None llama-3.3-70b-versatile "
+ " raw_response llm_label llm_reasoning llm_name \n",
+ "588 {\"label\": 0} 0 None llama-3.1-8b-instant \n",
+ "589 {\"label\": 1} 1 None llama-3.3-70b-versatile \n",
+ "590 {\\n \"label\": 0\\n} 0 None openai/gpt-oss-120b \n",
+ "591 {\"label\": 0} 0 None llama-3.1-8b-instant \n",
+ "592 {\"label\": 0} 0 None llama-3.3-70b-versatile \n",
+ "593 {\"label\": 0} 0 None openai/gpt-oss-120b "
]
},
- "execution_count": 39,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1087,7 +1012,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 14,
"id": "115b0d23",
"metadata": {},
"outputs": [
@@ -1115,204 +1040,123 @@
" Base Sentence | \n",
" llama-3.1-8b-instant | \n",
" llama-3.3-70b-versatile | \n",
+ " openai/gpt-oss-120b | \n",
" \n",
" \n",
" \n",
" \n",
" | 0 | \n",
- " | \n",
- " 1 | \n",
+ " 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. | \n",
+ " 0 | \n",
+ " 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
- " >> Amen | \n",
+ " A big thank you to Todd for joining us once again. | \n",
+ " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
- " >> YES, BOOGIE | \n",
+ " A big thanks to Chris behind the scenes. | \n",
+ " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
- " >> YOU GUYS LOVE ME | \n",
+ " A clear weather game for the Patriots. | \n",
" 0 | \n",
" 0 | \n",
+ " 1 | \n",
"
\n",
" \n",
" | 4 | \n",
- " >> Yeah | \n",
+ " A couple of drops. | \n",
+ " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 5 | \n",
- " All right, but here's why | \n",
- " 0 | \n",
- " 0 | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " | 6 | \n",
- " And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a | \n",
+ " 191 | \n",
+ " but I would not be running to bet JSN under his receiving total until the 23rd hour let's put it that way all right well I've got a don't here and this is more of an etiquette thing if you're at a Super Bowl party if you're with a group and that group is maybe not of the betting guild don't constantly talk about your bets I'm someone that does this for for a job and I can't stand those people if you listen if you're with your buddies and it's the group ride parlay and this is your group chat that you're with you know party up go crazy have not go nuts but for god's sakes read the room and don't get down on everyone because you're losing some wager celebrate your wins you hit a 25 to 1 first touchdown winner celebrate it have fun if you lose on heads don't throw a fit don't bring everyo... | \n",
+ " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 7 | \n",
- " And I'm doing it for you, Mina | \n",
+ " 192 | \n",
+ " decided he was going to dance around. | \n",
" 0 | \n",
" 0 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " | 8 | \n",
- " And then we'll see you again on Monday in Disneyland | \n",
+ " 193 | \n",
+ " look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored. | \n",
" 0 | \n",
" 1 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " Eastern time, we'll talk about how I'm right and you guys are wrong | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " Go with the Patriots | \n",
- " 0 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " I actually think Drake May and his legs is going to be the key factor in this game | \n",
" 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " I need to | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " I think he's going to step up in huge moments | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 16 | \n",
- " I'm doing it for you | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 17 | \n",
- " Thanks to everybody for being here | \n",
- " 0 | \n",
- " 0 | \n",
"
\n",
" \n",
- " | 18 | \n",
- " Thanks to our incredible staff back in Bristol, Connecticut | \n",
+ " 194 | \n",
+ " since he came over at the trade deadline. | \n",
" 0 | \n",
" 0 | \n",
- "
\n",
- " \n",
- " | 19 | \n",
- " We love you guys and we appreciate [cheering and applause] everybody for tuning in | \n",
- " 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 20 | \n",
- " We've got all kinds of great coverage still coming up | \n",
+ " 195 | \n",
+ " thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're... | \n",
" 0 | \n",
" 1 | \n",
- "
\n",
- " \n",
- " | 21 | \n",
- " >> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen | \n",
" 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 22 | \n",
- " m | \n",
- " 0 | \n",
- " 0 | \n",
"
\n",
" \n",
"\n",
+ "196 rows × 4 columns
\n",
""
],
"text/plain": [
- "llm_name Base Sentence \\\n",
- "0 \n",
- "1 >> Amen \n",
- "2 >> YES, BOOGIE \n",
- "3 >> YOU GUYS LOVE ME \n",
- "4 >> Yeah \n",
- "5 All right, but here's why \n",
- "6 And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a \n",
- "7 And I'm doing it for you, Mina \n",
- "8 And then we'll see you again on Monday in Disneyland \n",
- "9 But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong \n",
- "10 Eastern time, we'll talk about how I'm right and you guys are wrong \n",
- "11 Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next \n",
- "12 Go with the Patriots \n",
- "13 I actually think Drake May and his legs is going to be the key factor in this game \n",
- "14 I need to \n",
- "15 I think he's going to step up in huge moments \n",
- "16 I'm doing it for you \n",
- "17 Thanks to everybody for being here \n",
- "18 Thanks to our incredible staff back in Bristol, Connecticut \n",
- "19 We love you guys and we appreciate [cheering and applause] everybody for tuning in \n",
- "20 We've got all kinds of great coverage still coming up \n",
- "21 >> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen \n",
- "22 m \n",
+ "llm_name Base Sentence \\\n",
+ "0 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. \n",
+ "1 A big thank you to Todd for joining us once again. \n",
+ "2 A big thanks to Chris behind the scenes. \n",
+ "3 A clear weather game for the Patriots. \n",
+ "4 A couple of drops. \n",
+ ".. ... \n",
+ "191 but I would not be running to bet JSN under his receiving total until the 23rd hour let's put it that way all right well I've got a don't here and this is more of an etiquette thing if you're at a Super Bowl party if you're with a group and that group is maybe not of the betting guild don't constantly talk about your bets I'm someone that does this for for a job and I can't stand those people if you listen if you're with your buddies and it's the group ride parlay and this is your group chat that you're with you know party up go crazy have not go nuts but for god's sakes read the room and don't get down on everyone because you're losing some wager celebrate your wins you hit a 25 to 1 first touchdown winner celebrate it have fun if you lose on heads don't throw a fit don't bring everyo... \n",
+ "192 decided he was going to dance around. \n",
+ "193 look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored. \n",
+ "194 since he came over at the trade deadline. \n",
+ "195 thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're... \n",
"\n",
- "llm_name llama-3.1-8b-instant llama-3.3-70b-versatile \n",
- "0 1 0 \n",
- "1 0 0 \n",
- "2 0 0 \n",
- "3 0 0 \n",
- "4 0 0 \n",
- "5 0 0 \n",
- "6 0 0 \n",
- "7 0 0 \n",
- "8 0 1 \n",
- "9 1 1 \n",
- "10 0 1 \n",
- "11 0 0 \n",
- "12 0 1 \n",
- "13 1 1 \n",
- "14 0 0 \n",
- "15 1 1 \n",
- "16 0 0 \n",
- "17 0 0 \n",
- "18 0 0 \n",
- "19 0 0 \n",
- "20 0 1 \n",
- "21 1 1 \n",
- "22 0 0 "
+ "llm_name llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b \n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 1 \n",
+ "4 0 0 0 \n",
+ ".. ... ... ... \n",
+ "191 0 0 0 \n",
+ "192 0 0 1 \n",
+ "193 0 1 1 \n",
+ "194 0 0 0 \n",
+ "195 0 1 1 \n",
+ "\n",
+ "[196 rows x 4 columns]"
]
},
- "execution_count": 40,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1345,7 +1189,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 15,
"id": "62e771bf",
"metadata": {},
"outputs": [
@@ -1373,247 +1217,872 @@
" Base Sentence | \n",
" llama-3.1-8b-instant | \n",
" llama-3.3-70b-versatile | \n",
+ " openai/gpt-oss-120b | \n",
" Majority Vote Label | \n",
" \n",
" \n",
" \n",
" \n",
" | 0 | \n",
- " | \n",
- " 1 | \n",
+ " 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. | \n",
" 0 | \n",
" 0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " >> Amen | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " >> YES, BOOGIE | \n",
- " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 3 | \n",
- " >> YOU GUYS LOVE ME | \n",
- " 0 | \n",
- " 0 | \n",
+ " 1 | \n",
+ " A big thank you to Todd for joining us once again. | \n",
" 0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " >> Yeah | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 5 | \n",
- " All right, but here's why | \n",
- " 0 | \n",
+ " 2 | \n",
+ " A big thanks to Chris behind the scenes. | \n",
" 0 | \n",
" 0 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a | \n",
- " 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 7 | \n",
- " And I'm doing it for you, Mina | \n",
- " 0 | \n",
- " 0 | \n",
+ " 3 | \n",
+ " A clear weather game for the Patriots. | \n",
" 0 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " And then we'll see you again on Monday in Disneyland | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
- " | 9 | \n",
- " But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " Eastern time, we'll talk about how I'm right and you guys are wrong | \n",
+ " 4 | \n",
+ " A couple of drops. | \n",
" 0 | \n",
- " 1 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
- " \n",
- " | 12 | \n",
- " Go with the Patriots | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "llm_name Base Sentence \\\n",
+ "0 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. \n",
+ "1 A big thank you to Todd for joining us once again. \n",
+ "2 A big thanks to Chris behind the scenes. \n",
+ "3 A clear weather game for the Patriots. \n",
+ "4 A couple of drops. \n",
+ "\n",
+ "llm_name llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 1 \n",
+ "4 0 0 0 \n",
+ "\n",
+ "llm_name Majority Vote Label \n",
+ "0 0 \n",
+ "1 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results_df['Majority Vote Label'] = (\n",
+ " results_df\n",
+ " .iloc[:, 1:] # exclude sentence column\n",
+ " .mode(axis=1)[0]\n",
+ " .astype(int)\n",
+ ")\n",
+ "\n",
+ "results_df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e2ffdb99",
+ "metadata": {},
+ "source": [
+ "Add 'Human Annotation', 'Human Reasoning' columns to write human input, and 'Video ID' column to grab the video source from the respective sentence."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "97175036",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Base Sentence | \n",
+ " llama-3.1-8b-instant | \n",
+ " llama-3.3-70b-versatile | \n",
+ " openai/gpt-oss-120b | \n",
+ " Majority Vote Label | \n",
+ " Human Annotation | \n",
+ " Human Reasoning | \n",
+ " Video ID | \n",
"
\n",
+ " \n",
+ " \n",
" \n",
- " | 13 | \n",
- " I actually think Drake May and his legs is going to be the key factor in this game | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
+ " 0 | \n",
+ " 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 14 | \n",
- " I need to | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ " 1 | \n",
+ " A big thank you to Todd for joining us once again. | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 15 | \n",
- " I think he's going to step up in huge moments | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
+ " 2 | \n",
+ " A big thanks to Chris behind the scenes. | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 16 | \n",
- " I'm doing it for you | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ " 3 | \n",
+ " A clear weather game for the Patriots. | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 17 | \n",
- " Thanks to everybody for being here | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ " 4 | \n",
+ " A couple of drops. | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
- " \n",
- " | 18 | \n",
- " Thanks to our incredible staff back in Bristol, Connecticut | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ "
\n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Base Sentence \\\n",
+ "0 11 carries the rest of the game and didn't show that big explosive potential even against the much maligned Rams run defense. \n",
+ "1 A big thank you to Todd for joining us once again. \n",
+ "2 A big thanks to Chris behind the scenes. \n",
+ "3 A clear weather game for the Patriots. \n",
+ "4 A couple of drops. \n",
+ "\n",
+ " llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b \\\n",
+ "0 0.0 0.0 0.0 \n",
+ "1 0.0 0.0 0.0 \n",
+ "2 0.0 0.0 0.0 \n",
+ "3 0.0 0.0 1.0 \n",
+ "4 0.0 0.0 0.0 \n",
+ "\n",
+ " Majority Vote Label Human Annotation Human Reasoning Video ID \n",
+ "0 0.0 NaN NaN FPl-F2k_KtM \n",
+ "1 0.0 NaN NaN FPl-F2k_KtM \n",
+ "2 0.0 NaN NaN FPl-F2k_KtM \n",
+ "3 0.0 NaN NaN FPl-F2k_KtM \n",
+ "4 0.0 NaN NaN FPl-F2k_KtM "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results_df[['Human Annotation', 'Human Reasoning']] = np.nan\n",
+ "final_results_df = pd.concat([results_df, raw_transcripts_df[['Video ID']]], axis=1)\n",
+ "final_results_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "d8153ff4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "predictions = final_results_df[final_results_df['Majority Vote Label']==1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "0a4cbba0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Base Sentence | \n",
+ " llama-3.1-8b-instant | \n",
+ " llama-3.3-70b-versatile | \n",
+ " openai/gpt-oss-120b | \n",
+ " Majority Vote Label | \n",
+ " Human Annotation | \n",
+ " Human Reasoning | \n",
+ " Video ID | \n",
"
\n",
+ " \n",
+ " \n",
" \n",
- " | 19 | \n",
- " We love you guys and we appreciate [cheering and applause] everybody for tuning in | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ " 18 | \n",
+ " And J-Lo, I'm not quite sure the betting market is going to allow me an opportunity to get involved with the number that I've targeted. | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
" | 20 | \n",
- " We've got all kinds of great coverage still coming up | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
+ " And even without Zach Charbonnet, I just don't see Kenneth Walker getting to mid-70s or higher. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " And one more thing, Belichick, up for Hall of Fame, doesn't get in on the first time around. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 21 | \n",
- " >> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
+ " 41 | \n",
+ " But you also have a Seahawks offense that's going to be stepping up in class given the last three games, two against the 49ers, one against the Rams. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
- " | 22 | \n",
- " m | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
+ " 45 | \n",
+ " Do make a case to bet a lot of player performances to come in under their posted totals. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " Early bets here for Super Bowl 60. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " For me, I'm going to go to that Seattle Seahawks backfield though and take Kenneth Walker under his rushing total at 75 and a half. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 55 | \n",
+ " He could put this one away in one catch. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 63 | \n",
+ " He's not going to have time for downfield plays to develop. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 65 | \n",
+ " Hunter Henry over 36.5 yards, receiving minus 110. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 66 | \n",
+ " I am going to take a skilled position player for a team that I think wins the game, and that would lead me to Ramondre Stevenson at 28 to one. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 69 | \n",
+ " I can't wait to see you freak out on Super Bowl Sunday when he doesn't get that. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 73 | \n",
+ " I have him projected J-Lo in the high 60s, under 75.5 for the bell cow back wearing Seahawks colors. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 79 | \n",
+ " I like him to go over 36.5. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 84 | \n",
+ " I think he's a guy that may be targeted a couple times here. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 85 | \n",
+ " I think they may have to go away from that approach. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 86 | \n",
+ " I want to dig into it a little bit, figure out if there's a matchup or two that I think one of these sides can exploit. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 90 | \n",
+ " I'm going to start with a do. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 91 | \n",
+ " I'm going with Seattle receiver Raheed Shaheed, over 21.5 yards, receiving minus 110. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " I'm leaning towards over in this one. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 95 | \n",
+ " If I am going to get involved in a total, I'll probably gravitate a little bit more towards looking at that first half number. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 96 | \n",
+ " If the Patriots get up quick, but they do so on, say, a defensive score or a special team score, anything weird that happens early on, usually you can take an advantage of a market that will overcorrect to that, especially with everyone and their dog betting in this game and people chasing their bets. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " In a game like this, it appears the forecast in Santa Clara is going to be pretty good. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 102 | \n",
+ " It'll be all right. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 106 | \n",
+ " Jake Bobo, over a half a receiving yard at plus $1.15. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 115 | \n",
+ " My initial inclination was that if you're going to give me a good defense, that's going to have the opportunity to catch points in this particular spot. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 124 | \n",
+ " People are going to gravitate towards rooting for things to happen. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 127 | \n",
+ " Projections for Henry, sit as high as 46. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 135 | \n",
+ " So I do lean towards those unders in the first quarter. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 142 | \n",
+ " So you kind of have to have rough projections, know which players that you've targeted. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 144 | \n",
+ " That'll probably be on the sideline. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 149 | \n",
+ " The opportunity to move down to 4 if they see professional money most likely coming in on New England, or drift the number a lot further out in that 5-5.5 range if the public comes in on one side, or I guess I should say if the professionals decide they want to back the Seahawks. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 154 | \n",
+ " They're going to see low yardage totals with those third and fourth string wide receivers and tight ends and hope those players can come up with a catcher to do look to bet things under and do be willing to lay prices to be able to do so. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 164 | \n",
+ " We will have another one next week. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 173 | \n",
+ " When it comes to four and four-and-a-half point favorites, I've kind of made it a rule for myself to say, you know what don't play the points just go with the money line i find that four to four and a half is it's been since kind of four became more of a key number since 2015 it's been kind of a no man's land where that four point favorite or the four and a half point favorite will win but not quite cover they're good enough to be favored by more than a field goal but not quite good enough to be favored by six and so for me i went to seattle straight out just i mean two games over the last two weeks the new england patriots win but don't cover they close a three and a half four point favorite against the denver broncos and the week before we see the chicago bears come up just short in ... | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 179 | \n",
+ " Yeah, for me, knowing what the look-ahead spread was, I knew we were going to see a money line on Seattle around 2-1 for that favorite. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 190 | \n",
+ " and what have the odds makers not properly accounted for versus current form compared to season-long metrics where they'll heavily weight them towards a lot of individual player performances. | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 193 | \n",
+ " look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored. | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
+ "
\n",
+ " \n",
+ " | 195 | \n",
+ " thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're... | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " FPl-F2k_KtM | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- "llm_name Base Sentence \\\n",
- "0 \n",
- "1 >> Amen \n",
- "2 >> YES, BOOGIE \n",
- "3 >> YOU GUYS LOVE ME \n",
- "4 >> Yeah \n",
- "5 All right, but here's why \n",
- "6 And >> on Sunday night when we see you as part of the Handoff Show, which tune in around 1:00 a \n",
- "7 And I'm doing it for you, Mina \n",
- "8 And then we'll see you again on Monday in Disneyland \n",
- "9 But seriously, Drake May's going to stand up and it's going to be great and they're going to win and you guys are all going to be wrong \n",
- "10 Eastern time, we'll talk about how I'm right and you guys are wrong \n",
- "11 Enjoy Super Bowl 60 Sports Center with Matt and Hannah's next \n",
- "12 Go with the Patriots \n",
- "13 I actually think Drake May and his legs is going to be the key factor in this game \n",
- "14 I need to \n",
- "15 I think he's going to step up in huge moments \n",
- "16 I'm doing it for you \n",
- "17 Thanks to everybody for being here \n",
- "18 Thanks to our incredible staff back in Bristol, Connecticut \n",
- "19 We love you guys and we appreciate [cheering and applause] everybody for tuning in \n",
- "20 We've got all kinds of great coverage still coming up \n",
- "21 >> We are in danger of the NFL live curse affecting Super Bowl 60, but it's not going to happen \n",
- "22 m \n",
+ " Base Sentence \\\n",
+ "18 And J-Lo, I'm not quite sure the betting market is going to allow me an opportunity to get involved with the number that I've targeted. \n",
+ "20 And even without Zach Charbonnet, I just don't see Kenneth Walker getting to mid-70s or higher. \n",
+ "23 And one more thing, Belichick, up for Hall of Fame, doesn't get in on the first time around. \n",
+ "41 But you also have a Seahawks offense that's going to be stepping up in class given the last three games, two against the 49ers, one against the Rams. \n",
+ "45 Do make a case to bet a lot of player performances to come in under their posted totals. \n",
+ "47 Early bets here for Super Bowl 60. \n",
+ "51 For me, I'm going to go to that Seattle Seahawks backfield though and take Kenneth Walker under his rushing total at 75 and a half. \n",
+ "55 He could put this one away in one catch. \n",
+ "63 He's not going to have time for downfield plays to develop. \n",
+ "65 Hunter Henry over 36.5 yards, receiving minus 110. \n",
+ "66 I am going to take a skilled position player for a team that I think wins the game, and that would lead me to Ramondre Stevenson at 28 to one. \n",
+ "69 I can't wait to see you freak out on Super Bowl Sunday when he doesn't get that. \n",
+ "73 I have him projected J-Lo in the high 60s, under 75.5 for the bell cow back wearing Seahawks colors. \n",
+ "79 I like him to go over 36.5. \n",
+ "84 I think he's a guy that may be targeted a couple times here. \n",
+ "85 I think they may have to go away from that approach. \n",
+ "86 I want to dig into it a little bit, figure out if there's a matchup or two that I think one of these sides can exploit. \n",
+ "90 I'm going to start with a do. \n",
+ "91 I'm going with Seattle receiver Raheed Shaheed, over 21.5 yards, receiving minus 110. \n",
+ "92 I'm leaning towards over in this one. \n",
+ "95 If I am going to get involved in a total, I'll probably gravitate a little bit more towards looking at that first half number. \n",
+ "96 If the Patriots get up quick, but they do so on, say, a defensive score or a special team score, anything weird that happens early on, usually you can take an advantage of a market that will overcorrect to that, especially with everyone and their dog betting in this game and people chasing their bets. \n",
+ "99 In a game like this, it appears the forecast in Santa Clara is going to be pretty good. \n",
+ "102 It'll be all right. \n",
+ "106 Jake Bobo, over a half a receiving yard at plus $1.15. \n",
+ "115 My initial inclination was that if you're going to give me a good defense, that's going to have the opportunity to catch points in this particular spot. \n",
+ "124 People are going to gravitate towards rooting for things to happen. \n",
+ "127 Projections for Henry, sit as high as 46. \n",
+ "135 So I do lean towards those unders in the first quarter. \n",
+ "142 So you kind of have to have rough projections, know which players that you've targeted. \n",
+ "144 That'll probably be on the sideline. \n",
+ "149 The opportunity to move down to 4 if they see professional money most likely coming in on New England, or drift the number a lot further out in that 5-5.5 range if the public comes in on one side, or I guess I should say if the professionals decide they want to back the Seahawks. \n",
+ "154 They're going to see low yardage totals with those third and fourth string wide receivers and tight ends and hope those players can come up with a catcher to do look to bet things under and do be willing to lay prices to be able to do so. \n",
+ "164 We will have another one next week. \n",
+ "173 When it comes to four and four-and-a-half point favorites, I've kind of made it a rule for myself to say, you know what don't play the points just go with the money line i find that four to four and a half is it's been since kind of four became more of a key number since 2015 it's been kind of a no man's land where that four point favorite or the four and a half point favorite will win but not quite cover they're good enough to be favored by more than a field goal but not quite good enough to be favored by six and so for me i went to seattle straight out just i mean two games over the last two weeks the new england patriots win but don't cover they close a three and a half four point favorite against the denver broncos and the week before we see the chicago bears come up just short in ... \n",
+ "179 Yeah, for me, knowing what the look-ahead spread was, I knew we were going to see a money line on Seattle around 2-1 for that favorite. \n",
+ "190 and what have the odds makers not properly accounted for versus current form compared to season-long metrics where they'll heavily weight them towards a lot of individual player performances. \n",
+ "193 look we've seen a Patriots running back get snubbed in the past that had 37 catches and like 900 receiving yards this is a chance for the world to right all of its wrongs and give Ramondre the hardware if he has two touchdowns and 75 all-purpose yards give me Ramondre Stevenson 28 to 1 all right I'll see your skill player and raise you a defensive player go on Demarcus Lawrence it only makes sense that a cowboy is going to get to the Super Bowl and show it almost last year I did uh but I'm happy for Tank but let's talk about this defense this is the reason why San Seattle is here This is the reason why they're favored. \n",
+ "195 thanks to beth365 for the odds and of course thank you to you for tuning in and listening whether you've been with us all year or you're just finding us here ahead of the big game a reminder if you like what we're doing rate and review like and subscribe on youtube it just takes a matter of seconds uh but it means a lot to us so please do that when you can todd uh you thought you would get out of this podcast without some super bowl silliness but no because you know i mean i look not my first rodeo i know better that there's always something waiting behind door number three it's a basic one by this by this point in in super bowl betting this is a very common wager what color gatorade is going to get thrown on the winning coach here at the end of super bowl six i mean i feel like you're... \n",
"\n",
- "llm_name llama-3.1-8b-instant llama-3.3-70b-versatile Majority Vote Label \n",
- "0 1 0 0 \n",
- "1 0 0 0 \n",
- "2 0 0 0 \n",
- "3 0 0 0 \n",
- "4 0 0 0 \n",
- "5 0 0 0 \n",
- "6 0 0 0 \n",
- "7 0 0 0 \n",
- "8 0 1 0 \n",
- "9 1 1 1 \n",
- "10 0 1 0 \n",
- "11 0 0 0 \n",
- "12 0 1 0 \n",
- "13 1 1 1 \n",
- "14 0 0 0 \n",
- "15 1 1 1 \n",
- "16 0 0 0 \n",
- "17 0 0 0 \n",
- "18 0 0 0 \n",
- "19 0 0 0 \n",
- "20 0 1 0 \n",
- "21 1 1 1 \n",
- "22 0 0 0 "
+ " llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b \\\n",
+ "18 1.0 0.0 1.0 \n",
+ "20 1.0 1.0 1.0 \n",
+ "23 1.0 1.0 1.0 \n",
+ "41 1.0 1.0 1.0 \n",
+ "45 1.0 1.0 1.0 \n",
+ "47 1.0 1.0 0.0 \n",
+ "51 1.0 1.0 1.0 \n",
+ "55 0.0 1.0 1.0 \n",
+ "63 1.0 1.0 1.0 \n",
+ "65 1.0 1.0 1.0 \n",
+ "66 1.0 1.0 1.0 \n",
+ "69 0.0 1.0 1.0 \n",
+ "73 1.0 1.0 1.0 \n",
+ "79 1.0 1.0 0.0 \n",
+ "84 1.0 1.0 1.0 \n",
+ "85 0.0 1.0 1.0 \n",
+ "86 0.0 1.0 1.0 \n",
+ "90 0.0 1.0 1.0 \n",
+ "91 1.0 1.0 1.0 \n",
+ "92 1.0 1.0 1.0 \n",
+ "95 1.0 1.0 1.0 \n",
+ "96 1.0 1.0 1.0 \n",
+ "99 1.0 1.0 1.0 \n",
+ "102 0.0 1.0 1.0 \n",
+ "106 0.0 1.0 1.0 \n",
+ "115 0.0 1.0 1.0 \n",
+ "124 1.0 1.0 1.0 \n",
+ "127 1.0 1.0 1.0 \n",
+ "135 1.0 1.0 1.0 \n",
+ "142 1.0 1.0 0.0 \n",
+ "144 1.0 1.0 1.0 \n",
+ "149 1.0 1.0 1.0 \n",
+ "154 1.0 1.0 1.0 \n",
+ "164 1.0 1.0 1.0 \n",
+ "173 0.0 1.0 1.0 \n",
+ "179 1.0 1.0 1.0 \n",
+ "190 1.0 1.0 0.0 \n",
+ "193 0.0 1.0 1.0 \n",
+ "195 0.0 1.0 1.0 \n",
+ "\n",
+ " Majority Vote Label Human Annotation Human Reasoning Video ID \n",
+ "18 1.0 NaN NaN FPl-F2k_KtM \n",
+ "20 1.0 NaN NaN FPl-F2k_KtM \n",
+ "23 1.0 NaN NaN FPl-F2k_KtM \n",
+ "41 1.0 NaN NaN FPl-F2k_KtM \n",
+ "45 1.0 NaN NaN FPl-F2k_KtM \n",
+ "47 1.0 NaN NaN FPl-F2k_KtM \n",
+ "51 1.0 NaN NaN FPl-F2k_KtM \n",
+ "55 1.0 NaN NaN FPl-F2k_KtM \n",
+ "63 1.0 NaN NaN FPl-F2k_KtM \n",
+ "65 1.0 NaN NaN FPl-F2k_KtM \n",
+ "66 1.0 NaN NaN FPl-F2k_KtM \n",
+ "69 1.0 NaN NaN FPl-F2k_KtM \n",
+ "73 1.0 NaN NaN FPl-F2k_KtM \n",
+ "79 1.0 NaN NaN FPl-F2k_KtM \n",
+ "84 1.0 NaN NaN FPl-F2k_KtM \n",
+ "85 1.0 NaN NaN FPl-F2k_KtM \n",
+ "86 1.0 NaN NaN FPl-F2k_KtM \n",
+ "90 1.0 NaN NaN FPl-F2k_KtM \n",
+ "91 1.0 NaN NaN FPl-F2k_KtM \n",
+ "92 1.0 NaN NaN FPl-F2k_KtM \n",
+ "95 1.0 NaN NaN FPl-F2k_KtM \n",
+ "96 1.0 NaN NaN FPl-F2k_KtM \n",
+ "99 1.0 NaN NaN FPl-F2k_KtM \n",
+ "102 1.0 NaN NaN FPl-F2k_KtM \n",
+ "106 1.0 NaN NaN FPl-F2k_KtM \n",
+ "115 1.0 NaN NaN FPl-F2k_KtM \n",
+ "124 1.0 NaN NaN FPl-F2k_KtM \n",
+ "127 1.0 NaN NaN FPl-F2k_KtM \n",
+ "135 1.0 NaN NaN FPl-F2k_KtM \n",
+ "142 1.0 NaN NaN FPl-F2k_KtM \n",
+ "144 1.0 NaN NaN FPl-F2k_KtM \n",
+ "149 1.0 NaN NaN FPl-F2k_KtM \n",
+ "154 1.0 NaN NaN FPl-F2k_KtM \n",
+ "164 1.0 NaN NaN FPl-F2k_KtM \n",
+ "173 1.0 NaN NaN FPl-F2k_KtM \n",
+ "179 1.0 NaN NaN FPl-F2k_KtM \n",
+ "190 1.0 NaN NaN FPl-F2k_KtM \n",
+ "193 1.0 NaN NaN FPl-F2k_KtM \n",
+ "195 1.0 NaN NaN FPl-F2k_KtM "
]
},
- "execution_count": 41,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "results_df['Majority Vote Label'] = (\n",
- " results_df\n",
- " .iloc[:, 1:] # exclude sentence column\n",
- " .mode(axis=1)[0]\n",
- " .astype(int)\n",
- ")\n",
- "\n",
- "results_df"
+ "predictions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "080f76f7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saving CSV file to: c:\\Users\\adria\\OneDrive\\Área de Trabalho\\UF Data Studio\\predictions\\prediction_acquition-youtube\\../data\\yt\\majority_vote\\sports\\batch_11.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "base_data_path = DataProcessing.load_base_data_path(notebook_dir=notebook_dir)\n",
+ "save_data_path = os.path.join(base_data_path, \"yt\", \"majority_vote\", \"sports\")\n",
+ "DataProcessing.save_to_file(predictions, path=save_data_path, prefix='batch_11', save_file_type='csv', include_version=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "d8153ff4",
+ "id": "42dd30cf",
"metadata": {},
"outputs": [],
"source": []
@@ -1621,7 +2090,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": ".venv",
+ "display_name": "predictions (3.11.14)",
"language": "python",
"name": "python3"
},