From fcc7b9f54fc197e61cac3d9dc6d831a87ac59458 Mon Sep 17 00:00:00 2001 From: naivm Date: Wed, 2 Oct 2019 11:53:01 +0100 Subject: [PATCH 1/2] done --- your-code/Project_README.md | 57 + your-code/spotify_project.ipynb | 8169 +++++++++++++++++++++++++++++++ 2 files changed, 8226 insertions(+) create mode 100644 your-code/Project_README.md create mode 100644 your-code/spotify_project.ipynb diff --git a/your-code/Project_README.md b/your-code/Project_README.md new file mode 100644 index 0000000..7174412 --- /dev/null +++ b/your-code/Project_README.md @@ -0,0 +1,57 @@ +Ironhack Logo + +# App Store vs Google Play? +*Miguel Vian and Alona Sorochynska* + +*Data Squad 21 Lisbon 20/09/19* + +## Content +- [Project Description](#project-description) +- [Hypotheses / Questions](#hypotheses-/-questions) +- [Dataset](#dataset) +- [Workflow](#workflow) +- [Organization](#organization) +- [Links](#links) + + + +## Project Description +This Prject goal is to practice what we have learned in Business Intelligence and Tableau. By making chosing Dataset(s) and making a Story through the visualizations in Tableau. + + + +## Hypotheses / Questions +How are the two shop ecosystems different? +Who spends more money? And on what? +Who is most engarged? + + + +## Dataset +Data sets removed from Kaggle + +[Dataset Apple](https://www.kaggle.com/ramamet4/app-store-apple-data-set-10k-apps#AppleStore.csv) +[Dataset Google](https://www.kaggle.com/gauthamp10/google-playstore-apps#Google-Playstore-Full.csv) + + + +## Workflow +Data collection --> Data cleaning --> Data visualization(Exploration phase) --> Create Dashboard --> Present Story + +We decided to compare two data sets from different app stores. First we had to clean unwanted columns from both data sets. Then we cheked for duplicates in terms of App Names. Then cheked for errors in the columns like non numeric ratings and non text categories. The Category/Genre columns had different values for each store, so we made the adjustments to make them equal. After all that cleaning it was time to concatenate. But before concat we reanme the remaining columns to the same names and made a column to have an identifier for each store. A column full of "A"s to mark the apple store and one of "G"s for the google one. +Finally we concatenated and exported it as a excel to be read by Tableau. //n +Inside Tableau we tried to make every graph that could expose the differences between both stores. Manly relating things like Price, Categories and Ratings + + + +## Organization +Trello + + + +## Links +Include the links to your repository, slides and trello. Feel free to include any other links associated to your project. + +[Repository](https://github.com/naivm/Project-Week-6-Tableau) +[Tableau Public](https://public.tableau.com/profile/miguel.vian#!/vizhome/Project6-StorevsPlay/Dashboard1) +[Trello](https://trello.com/invite/b/ywUwW4In/4c9c5895f202192bd3cf0c2896f4817b/project-5) diff --git a/your-code/spotify_project.ipynb b/your-code/spotify_project.ipynb new file mode 100644 index 0000000..7bd4019 --- /dev/null +++ b/your-code/spotify_project.ipynb @@ -0,0 +1,8169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Create your connection.\n", + "cnx = sqlite3.connect('billboard-200.db')\n", + "\n", + "df_albums = pd.read_sql_query(\"SELECT * FROM albums\", cnx)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "df_acoustic_features = pd.read_sql_query(\"SELECT * FROM acoustic_features\", cnx)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_iddate
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.0000006.00.101-6.3110.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-21
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.0000007.00.106-9.0610.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-21
218yllZD0TdF7ykcREib8Z1I Did ItHoodie SZNA Boogie Wit da Hoodie0.15300.718215305.00.4540.0000468.00.116-9.0121.00.12789.4834.00.1963r5hf3Cj3EMh1C2saQ8jyt2018-12-21
31wJRveJZLSb1rjhnUHQiv6Swervin (feat. 6ix9ine)Hoodie SZNA Boogie Wit da Hoodie0.01530.581189487.00.6620.0000009.00.111-5.2391.00.30393.0234.00.4343r5hf3Cj3EMh1C2saQ8jyt2018-12-21
40jAfdqv18goRTUxm3ilRjbStartender (feat. Offset and Tyga)Hoodie SZNA Boogie Wit da Hoodie0.02350.736192779.00.6220.0000006.00.151-4.6530.00.133191.9714.00.5063r5hf3Cj3EMh1C2saQ8jyt2018-12-21
\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "2 18yllZD0TdF7ykcREib8Z1 I Did It Hoodie SZN \n", + "3 1wJRveJZLSb1rjhnUHQiv6 Swervin (feat. 6ix9ine) Hoodie SZN \n", + "4 0jAfdqv18goRTUxm3ilRjb Startender (feat. Offset and Tyga) Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "2 A Boogie Wit da Hoodie 0.1530 0.718 215305.0 0.454 \n", + "3 A Boogie Wit da Hoodie 0.0153 0.581 189487.0 0.662 \n", + "4 A Boogie Wit da Hoodie 0.0235 0.736 192779.0 0.622 \n", + "\n", + " instrumentalness key liveness loudness mode speechiness tempo \\\n", + "0 0.000000 6.0 0.101 -6.311 0.0 0.427 90.195 \n", + "1 0.000000 7.0 0.106 -9.061 0.0 0.158 126.023 \n", + "2 0.000046 8.0 0.116 -9.012 1.0 0.127 89.483 \n", + "3 0.000000 9.0 0.111 -5.239 1.0 0.303 93.023 \n", + "4 0.000000 6.0 0.151 -4.653 0.0 0.133 191.971 \n", + "\n", + " time_signature valence album_id date \n", + "0 4.0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "1 4.0 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "2 4.0 0.196 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "3 4.0 0.434 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "4 4.0 0.506 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddateartistalbumranklengthtrack_length
01NoneNoneNoneNoneNaNNaN
122019-01-19A Boogie Wit da HoodieHoodie SZN120.0185233.800000
232019-01-1921 SavageI Am > I Was215.0211050.733333
342019-01-19SoundtrackSpider-Man: Into The Spider-Verse313.0190866.384615
452019-01-19Meek MillChampionships419.0219173.894737
\n", + "
" + ], + "text/plain": [ + " id date artist album \\\n", + "0 1 None None None \n", + "1 2 2019-01-19 A Boogie Wit da Hoodie Hoodie SZN \n", + "2 3 2019-01-19 21 Savage I Am > I Was \n", + "3 4 2019-01-19 Soundtrack Spider-Man: Into The Spider-Verse \n", + "4 5 2019-01-19 Meek Mill Championships \n", + "\n", + " rank length track_length \n", + "0 None NaN NaN \n", + "1 1 20.0 185233.800000 \n", + "2 2 15.0 211050.733333 \n", + "3 3 13.0 190866.384615 \n", + "4 4 19.0 219173.894737 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalence
count339850.000000339850.0000003.398500e+05339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000
mean0.2988060.5411762.367351e+050.6167160.0982775.2395260.233439-9.0727590.7057000.092625119.9339743.8875470.506169
std0.3155330.1708211.022557e+050.2440430.2419803.5533700.2152564.6010320.4557280.12889430.3808990.4536730.253193
min0.0000000.0000001.062000e+030.0000000.0000000.0000000.000000-60.0000000.0000000.0000000.0000000.0000000.000000
25%0.0215000.4250001.865218e+050.4400000.0000002.0000000.098200-11.5750000.0000000.03340096.1882504.0000000.302000
50%0.1640000.5470002.263730e+050.6470000.0000535.0000000.141000-8.1210001.0000000.044700118.2230004.0000000.506000
75%0.5460000.6640002.710000e+050.8220000.0111008.0000000.299000-5.6790001.0000000.084000139.2880004.0000000.712000
max0.9960000.9880004.734079e+061.0000001.00000011.0000001.0000003.7440001.0000000.968000247.8240005.0000001.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "count 339850.000000 339850.000000 3.398500e+05 339850.000000 \n", + "mean 0.298806 0.541176 2.367351e+05 0.616716 \n", + "std 0.315533 0.170821 1.022557e+05 0.244043 \n", + "min 0.000000 0.000000 1.062000e+03 0.000000 \n", + "25% 0.021500 0.425000 1.865218e+05 0.440000 \n", + "50% 0.164000 0.547000 2.263730e+05 0.647000 \n", + "75% 0.546000 0.664000 2.710000e+05 0.822000 \n", + "max 0.996000 0.988000 4.734079e+06 1.000000 \n", + "\n", + " instrumentalness key liveness loudness \\\n", + "count 339850.000000 339850.000000 339850.000000 339850.000000 \n", + "mean 0.098277 5.239526 0.233439 -9.072759 \n", + "std 0.241980 3.553370 0.215256 4.601032 \n", + "min 0.000000 0.000000 0.000000 -60.000000 \n", + "25% 0.000000 2.000000 0.098200 -11.575000 \n", + "50% 0.000053 5.000000 0.141000 -8.121000 \n", + "75% 0.011100 8.000000 0.299000 -5.679000 \n", + "max 1.000000 11.000000 1.000000 3.744000 \n", + "\n", + " mode speechiness tempo time_signature \\\n", + "count 339850.000000 339850.000000 339850.000000 339850.000000 \n", + "mean 0.705700 0.092625 119.933974 3.887547 \n", + "std 0.455728 0.128894 30.380899 0.453673 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.033400 96.188250 4.000000 \n", + "50% 1.000000 0.044700 118.223000 4.000000 \n", + "75% 1.000000 0.084000 139.288000 4.000000 \n", + "max 1.000000 0.968000 247.824000 5.000000 \n", + "\n", + " valence \n", + "count 339850.000000 \n", + "mean 0.506169 \n", + "std 0.253193 \n", + "min 0.000000 \n", + "25% 0.302000 \n", + "50% 0.506000 \n", + "75% 0.712000 \n", + "max 1.000000 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlengthtrack_length
count573947.000000492547.0000004.688670e+05
mean286974.00000013.7289332.443541e+05
std165684.3718127.9606407.195498e+04
min1.0000001.0000002.542290e+04
25%143487.50000010.0000002.100814e+05
50%286974.00000012.0000002.372190e+05
75%430460.50000015.0000002.680566e+05
max573947.000000666.0000004.457023e+06
\n", + "
" + ], + "text/plain": [ + " id length track_length\n", + "count 573947.000000 492547.000000 4.688670e+05\n", + "mean 286974.000000 13.728933 2.443541e+05\n", + "std 165684.371812 7.960640 7.195498e+04\n", + "min 1.000000 1.000000 2.542290e+04\n", + "25% 143487.500000 10.000000 2.100814e+05\n", + "50% 286974.000000 12.000000 2.372190e+05\n", + "75% 430460.500000 15.000000 2.680566e+05\n", + "max 573947.000000 666.000000 4.457023e+06" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "date object\n", + "artist object\n", + "album object\n", + "rank object\n", + "length float64\n", + "track_length float64\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33012" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df_albums.album.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "date 1\n", + "artist 1\n", + "album 1\n", + "rank 1\n", + "length 81400\n", + "track_length 105080\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "573917 True\n", + "573918 True\n", + "573919 True\n", + "573920 True\n", + "573921 True\n", + "573922 True\n", + "573923 True\n", + "573924 True\n", + "573925 True\n", + "573926 True\n", + "573927 True\n", + "573928 True\n", + "573929 True\n", + "573930 True\n", + "573931 True\n", + "573932 True\n", + "573933 True\n", + "573934 True\n", + "573935 True\n", + "573936 True\n", + "573937 True\n", + "573938 True\n", + "573939 True\n", + "573940 True\n", + "573941 True\n", + "573942 True\n", + "573943 True\n", + "573944 True\n", + "573945 True\n", + "573946 True\n", + "Length: 573947, dtype: bool" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.duplicated(subset=[\"album\",\"artist\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums.dropna(subset=[\"album\"], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums[\"rank\"] = pd.to_numeric(df_albums[\"rank\"], downcast=\"integer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums[\"date\"] = pd.to_datetime(df_albums[\"date\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2019-01-19 00:00:00')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(df_albums[\"date\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "albums = df_albums.groupby(by=\"album\", as_index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums.sort_values([\"album\",\"date\"], axis=0, ascending=False, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#df_albums.sort_values('rank', ascending=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "albums = df_albums.sort_values('date').groupby(by=[\"artist\",'album'],as_index=False).agg({\"date\":['first','last'],\"rank\":\"min\",\"length\":\"mean\",\"track_length\":\"mean\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "albums = albums.droplevel(1, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "albums.columns = [\"artist\",'album', 'date_start', 'date_end', 'rank', 'length', 'track_length']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_length
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN
1Silhouette1988-10-221989-11-1881.0230416.000000
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.000000
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length \n", + "0 107 72.0 NaN \n", + "1 8 1.0 230416.000000 \n", + "2 195 10.0 290017.000000 \n", + "3 17 NaN NaN \n", + "4 46 12.0 224445.166667 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "albums[\"days_top\"]= (albums.date_end-albums.date_start).astype('timedelta64[D]')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Silhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top \n", + "0 107 72.0 NaN 42.0 \n", + "1 8 1.0 230416.000000 392.0 \n", + "2 195 10.0 290017.000000 0.0 \n", + "3 17 NaN NaN 154.0 \n", + "4 46 12.0 224445.166667 175.0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 36338.000000\n", + "mean 257.963619\n", + "std 1191.680746\n", + "min 0.000000\n", + "25% 7.000000\n", + "50% 56.000000\n", + "75% 147.000000\n", + "max 18823.000000\n", + "Name: days_top, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums[\"days_top\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Silhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
18\"Weird Al\" YankovicWeird Al Yankovic1983-05-211983-07-02139NaNNaN42.0
17\"Weird Al\" YankovicThe Essential \"Weird Al\" Yankovic2009-11-142009-11-1417838.0241838.9736840.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 \n", + "1 Silhouette 1988-10-22 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 \n", + "18 \"Weird Al\" Yankovic Weird Al Yankovic 1983-05-21 \n", + "17 \"Weird Al\" Yankovic The Essential \"Weird Al\" Yankovic 2009-11-14 \n", + "\n", + " date_end rank length track_length days_top \n", + "0 1965-08-21 107 72.0 NaN 42.0 \n", + "1 1989-11-18 8 1.0 230416.000000 392.0 \n", + "2 2007-03-24 195 10.0 290017.000000 0.0 \n", + "18 1983-07-02 139 NaN NaN 42.0 \n", + "17 2009-11-14 178 38.0 241838.973684 0.0 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#sns.pairplot(albums)\n", + "albums.isna().sum()#\n", + "albums.sort_values(\"artist\").head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3327" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.album.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "albums.at[1, 'artist'] = \"Kenny G\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "albums.at[0, 'artist'] = \"Various Artists\"" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top \n", + "0 107 72.0 NaN 42.0 \n", + "1 8 1.0 230416.000000 392.0 \n", + "2 195 10.0 290017.000000 0.0 \n", + "3 17 NaN NaN 154.0 \n", + "4 46 12.0 224445.166667 175.0 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "albums.weeks_top = (albums.days_top + 7) / 7" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEGCAYAAACO8lkDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deXxc1X338c9Po122JFuSV9mWV8A2xoABQygESMAkKSYFErMV+tDSpNA2bdPXA08TmtDQB5ImpCmQhgQKpQFMSPKgJAaHNUACxrJjG8vCtjBeZHmRbVmb0f57/pgrZ9CMrNE6Wr7v12temjn33DPnWtZ8595z77nm7oiIiERKSnQHRERk6FE4iIhIFIWDiIhEUTiIiEgUhYOIiERJTnQH+kN+fr4XFRUluhsiIsPKunXrDrl7QaxlIyIcioqKKCkpSXQ3RESGFTPb1dUyHVYSEZEoCgcREYmicBARkSgKBxERiaJwEBGRKHGFg5ktM7OtZlZuZnfEWJ5mZiuD5WvMrCgozzOzV82s3sweiKg/1sw2RDwOmdl3g2U3m1lVxLI/759NFRGReHV7KquZhYAHgU8CFcBaMyt29y0R1W4Bqt19jpmtAO4DPg80Al8FFgYPANy9Dlgc8R7rgJ9FtLfS3W/v9VaJiEifxLPncDZQ7u473L0ZeBpY3qnOcuDx4PmzwCVmZu7e4O5vEg6JmMxsLjABeKPHvRcRkQERTzhMBfZEvK4IymLWcfdWoAbIi7MP1xLeU4i8scRVZrbJzJ41s2mxVjKzW82sxMxKqqqq4nwrERGJRzxXSFuMss53CIqnTldWADdGvP4F8JS7N5nZFwjvkVwc1bj7w8DDAEuWLBl2dyx6cs3uLpddd870QeyJiEi0ePYcKoDIb++FQGVXdcwsGcgBjnTXsJmdBiS7+7qOMnc/7O5NwcsfAmfG0UcREelH8YTDWmCumc00s1TC3/SLO9UpBm4Knl8NvOLx3X/0WuCpyAIzmxzx8gqgLI52RESkH3V7WMndW83sdmA1EAIedfdSM7sbKHH3YuAR4AkzKye8x7CiY30z2wlkA6lmdiVwacSZTp8DPtXpLf/GzK4AWoO2bu7D9omISC/ENSuru68CVnUquyvieSNwTRfrFp2g3Vkxyu4E7oynXyIiMjB0hbSIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRIkrHMxsmZltNbNyM7sjxvI0M1sZLF9jZkVBeZ6ZvWpm9Wb2QKd1Xgva3BA8JpyoLRERGTzdhoOZhYAHgcuB+cC1Zja/U7VbgGp3nwPcD9wXlDcCXwW+3EXz17v74uBxsJu2RERkkMSz53A2UO7uO9y9GXgaWN6pznLg8eD5s8AlZmbu3uDubxIOiXjFbKsH64uISB/FEw5TgT0RryuCsph13L0VqAHy4mj7v4JDSl+NCIC42jKzW82sxMxKqqqq4ngrERGJVzzhEOtbu/eiTmfXu/upwB8Fjxt70pa7P+zuS9x9SUFBQTdvJSIiPRFPOFQA0yJeFwKVXdUxs2QgBzhyokbdfW/wsw54kvDhq161JSIi/SuecFgLzDWzmWaWCqwAijvVKQZuCp5fDbzi7l3uOZhZspnlB89TgM8Am3vTloiI9L/k7iq4e6uZ3Q6sBkLAo+5eamZ3AyXuXgw8AjxhZuWEv+Wv6FjfzHYC2UCqmV0JXArsAlYHwRACXgJ+GKzSZVsiIjI4ug0HAHdfBazqVHZXxPNG4Jou1i3qotkzu6jfZVsiIjI4dIW0iIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEiUuMLBzJaZ2VYzKzezO2IsTzOzlcHyNWZWFJTnmdmrZlZvZg9E1M80s1+Z2XtmVmpm90Ysu9nMqsxsQ/D4875vpoiI9ES34WBmIeBB4HJgPnCtmc3vVO0WoNrd5wD3A/cF5Y3AV4Evx2j639z9ZOB04GNmdnnEspXuvjh4/KhHWyQiIn0Wz57D2UC5u+9w92bgaWB5pzrLgceD588Cl5iZuXuDu79JOCSOc/dj7v5q8LwZWA8U9mE7RESkH8UTDlOBPRGvK4KymHXcvRWoAfLi6YCZ5QJ/DLwcUXyVmW0ys2fNbFo87YiISP+JJxwsRpn3ok50w2bJwFPA99x9R1D8C6DI3RcBL/GHPZLO695qZiVmVlJVVdXdW4mISA/EEw4VQOS390Kgsqs6wQd+DnAkjrYfBra7+3c7Ctz9sLs3BS9/CJwZa0V3f9jdl7j7koKCgjjeSkRE4hVPOKwF5prZTDNLBVYAxZ3qFAM3Bc+vBl5x9xPuOZjZNwiHyJc6lU+OeHkFUBZHH0VEpB8ld1fB3VvN7HZgNRACHnX3UjO7Gyhx92LgEeAJMysnvMewomN9M9sJZAOpZnYlcClQC/wT8B6w3swAHgjOTPobM7sCaA3aurmftlVEROLUbTgAuPsqYFWnsrsinjcC13SxblEXzcYap8Dd7wTujKdfIiIyMHSFtIiIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEiSsczGyZmW01s3IzuyPG8jQzWxksX2NmRUF5npm9amb1ZvZAp3XONLN3g3W+Z2YWlI83sxfNbHvwc1zfN1NERHqi23AwsxDwIHA5MB+41szmd6p2C1Dt7nOA+4H7gvJG4KvAl2M0/X3gVmBu8FgWlN8BvOzuc4GXg9ciIjKI4tlzOBsod/cd7t4MPA0s71RnOfB48PxZ4BIzM3dvcPc3CYfEcWY2Gch297fc3YH/Bq6M0dbjEeUiIjJI4gmHqcCeiNcVQVnMOu7eCtQAed20WdFFmxPdfV/Q1j5gQqwGzOxWMysxs5Kqqqo4NkNEROIVTzhYjDLvRZ2+1I+u7P6wuy9x9yUFBQU9WVVERLoRTzhUANMiXhcClV3VMbNkIAc40k2bhV20eSA47NRx+OlgHH0UEZF+FE84rAXmmtlMM0sFVgDFneoUAzcFz68GXgnGEmIKDhfVmdnS4CylPwWei9HWTRHlIiIySJK7q+DurWZ2O7AaCAGPunupmd0NlLh7MfAI8ISZlRPeY1jRsb6Z7QSygVQzuxK41N23AF8EHgMygOeDB8C9wDNmdguwG7imPzZURETi1204ALj7KmBVp7K7Ip430sWHuLsXdVFeAiyMUX4YuCSefomIyMDQFdIiIhJF4SAiIlEUDiIiEkXhICIiURQOIiISReEwBLk7v99dzXv7a2lpa090d0RkFIrrVFYZPG3tzlef28z/vL0bgNRQEpctnMS3rl5Eekoowb0TkdFC4TCENLa08dQ7u9l+sJ5bL5jFginZrN9VzX+/vYtDdU388KYljEnTr0xEBp4+aYaQVe/u4/2qeu676lQ+f9Z0AJYvnsrp08fxDz/ZyI2PrOF/bjmHLAWEiAwwjTkMEdXHmlm/u5qzZ+YdD4YOV54+lYeuP4MNe47yjV9tSVAPRWQ0UTgMEa9vq8IwLpibH3P5ZQsm8YULZ/PUO3v4den+Qe6diIw2CochoPbDFtbtquaMGbnkZqZ2We/vPjGPhVOzueNn73KwrrHLeiIifaVwGALe2F5FuzsXzot507vjUpOT+O7nT+dYcytf+fnmQeqdiIxGGtlMsNa2dkp2VbOoMJfxWeG9hifX7D7hOl/6xDzuff49XtxygE/OnzgY3RSRUUZ7Dgm241ADTa3tnFaYE/c6t5w/k3kTx/C14lKONbcOYO9EZLRSOCRY2b5aUkLGrIIxca+TEkrins+eyt6jH/LvL28fwN6JyGilw0oJ5O6U7atl7oSxpITiz+mOw05nzhjHD1/fQVpyiEnZ6ceXX3fO9K5WFRGJi/YcEqiyppHaxlbmT87u1frLFkwiPSXEcxv20t71LbtFRHpM4ZBAZftqMWDepLG9Wj8rLZnLF05i1+FjrN9V3b+dE5FRTeGQQGX7apmel9mn+ZJOnz6OorxMnt+8n/omDU6LSP9QOCTI0WPN7Ktp7PUhpQ5JZixfPJXm1nZ+uamyn3onIqNdXOFgZsvMbKuZlZvZHTGWp5nZymD5GjMrilh2Z1C+1cwuC8pOMrMNEY9aM/tSsOxrZrY3Ytmn+mdTh5byg/UAzJvYu0NKkSZmp3PRyQVsqqhh896aPrcnItJtOJhZCHgQuByYD1xrZvM7VbsFqHb3OcD9wH3BuvOBFcACYBnwkJmF3H2ruy9298XAmcAx4OcR7d3fsdzdV/VtE4emXYePkZkaYsLYtH5p78J5E5iSm85zG/ZyuL6pX9oUkdErnj2Hs4Fyd9/h7s3A08DyTnWWA48Hz58FLjEzC8qfdvcmd/8AKA/ai3QJ8L677+rtRgxHu440MH18JuF/pr4LJRlXnzGNxpZ2/unnm3GdvSQifRBPOEwF9kS8rgjKYtZx91agBsiLc90VwFOdym43s01m9qiZjYvVKTO71cxKzKykqqoqjs0YOg7XN3GovpkZeVn92u6knHQ+OX8iL5Tu57Hf7ezXtkVkdIknHGJ9te38tbSrOidc18xSgSuAn0Qs/z4wG1gM7AO+HatT7v6wuy9x9yUFBQVd934IWhecdlqUl9nvbZ8/N59PnDKBf11VxvrdOr1VRHonnnCoAKZFvC4EOp8Wc7yOmSUDOcCRONa9HFjv7gc6Ctz9gLu3uXs78EOiD0MNeyW7qgklGVNyM/q97SQzvn3NYiZmp3P7j9dram8R6ZV4TrBfC8w1s5nAXsKHga7rVKcYuAl4C7gaeMXd3cyKgSfN7DvAFGAu8E7EetfS6ZCSmU12933By88CI25u6pKdR5iam9GjKTN6Iiczhf+84Uyu+c+3+LP/WsvKvzz3I9dSdDfrq6bfEJFuP52CMYTbgdVAGfCMu5ea2d1mdkVQ7REgz8zKgb8H7gjWLQWeAbYALwC3uXsbgJllAp8EftbpLb9pZu+a2SbgIuDv+riNQ0pjSxub99YyYwAOKUVaODWHh244g/f21/GFJ9bR3No+oO8nIiNLXJfmBqeTrupUdlfE80bgmi7WvQe4J0b5McKD1p3Lb4ynT8PVu3traG5rZ8b4/h2MjuWikyZw31WL+PJPNnLbk+t54LrTSUsODfj7isjwpyukB1nJzvAg8fQB3nPocPWZhXz9igW8uOUAtzxWovs/iEhcFA6DbN2uamblZ/VpPqWeuum8Ir519SJ+9/4hrv/RGuoaWwbtvUVkeFI4DLLSyhpO7cFd3/rLNUum8dD1Z1K2r5aHXnufiupjg94HERk+FA6D6EhDeLK9BVP6Ntleby1bOImffvE8DHj49R2s+eCwrqQWkZgUDoOotDI8Kd6CKYO/59BhwZQc/uqiORTlZ/Hchkoef2sntTrMJCKdKBwGUWllLUCfp+nuqzFpydx8XhGfWTSZHVUNfO/l7ZrNVUQ+QuEwiLZU1jIlJ51xWamJ7gpJZpw3O5/bL57DuMxUnnxnNz8p2aOzmUQEiPM6B+kfpZU1zE/gIaVYJoxN5wsXzubVrQd5betBth2oI29MKlcuntpvM8aKyPCjPYdBcqy5lR2HGhI2GH0ioSTjE6dM5LaL5jA+K5W/W7mRGx5Zw46q+kR3TUQSROEwSMr21eHOkAyHDpNzMvjLC2fzjSsXsqmihmX//gbffWkbjS1tie6aiAwyHVYaJFs6zlSaOvCHlbqbWO9Eksy47pzpXLpgIv/yyzK++9J2flJSwVc+fQrLFk7SoSaRUULhMEhKK2vJzUxhSk56orsSlwlj0/mPa0/n2rOncfcvtvDFH6/n3Fl5/PMV8zl50on3fjTrq8jwp8NKg6S0spYFU7KH3Tfv82bn88u/Pp9/Wb6Asv21fOrf3+Cffv6u7hMhMsIpHAZBS1s7W/fXJfTit75IDiVx47lFvPblj3Pj0hmsXLuHC7/5Gt/+9VbN0yQyQikcBkH5wXqa29qH9GB0PHIzU/n68oW89PcXcskpE/iPV8q54Juv8qM3dmjQWmSEUTgMgo4ro4d7OHQoys/igevO4Be3n8+CKTl841dlfPxbr/Hfb+1USIiMEBqQHgSllTWkpyQxM39MorsSl3gHlE8tzOF//vwcfld+iPtf2sZdz5Xy4KvlnF00niVF4wfsNqgiMvAUDoOgtLKWkydlE0oaXoPR8TpvTj7nzs7jrfcP892XtvOLTfv4zbYqLphXwFkKCZFhSX+1A8zdKQvOVBrJzIzz5uSz8i+Xcsv5MxmflcovN+3j27/eyu/eP0RLm+5hLTKcaM9hgO058iF1Ta3D9kylnjIzZheMYVZ+FjsONfBy2UF+GexJXBjsSYjI0BfXnoOZLTOzrWZWbmZ3xFieZmYrg+VrzKwoYtmdQflWM7ssonynmb1rZhvMrCSifLyZvWhm24Of4/q2iYn1h3s4jOw9h846QuLWC2bx5+fPJH9MGr/ctI9/+/VWHn3zAw1ciwxx3e45mFkIeBD4JFABrDWzYnffElHtFqDa3eeY2QrgPuDzZjYfWAEsAKYAL5nZPHfv+GS4yN0PdXrLO4CX3f3eIIjuAP53H7YxoUorawklGSdNGpvorvSbnk7PMatgDLMKxrCjqp6X3zvI3b/cwkOvlXPL+bO4Yel0xqan9Kh9XWEtMvDi2XM4Gyh39x3u3gw8DSzvVGc58Hjw/FngEgtfCrwceNrdm9z9A6A8aO9EItt6HLgyjj4OWaWVNcwpGEN6SijRXUm4WQVj+Is/msXTty7llMnZ3PfCe3zs3lf4zovbqG5oTnT3RCRCPGMOU4E9Ea8rgHO6quPurWZWA+QF5W93Wndq8NyBX5uZAz9w94eD8onuvi9oa5+ZTYjVKTO7FbgVYPr0oftNsrSylvPn5Ce6G0PK0ll5LJ2Vx8Y9R3notXK+9/J2fvTGDq46o5CbzpuR6O6JCPGFQ6zzLzvflb6rOida92PuXhl8+L9oZu+5++tx9CfcSDhMHgZYsmRJ5/4MCVV1TRysa2L+KBtviNdp03L5wY1L2Hagjh/8Zgcr1+7hibd3Mbsgi3Nn5XPy5LEkDbO5qERGingOK1UA0yJeFwKVXdUxs2QgBzhyonXdvePnQeDn/OFw0wEzmxy0NRk4GP/mDC1/GIweHWcq9da8iWP59udO4607L+YfLzuJQ/XN/M+aXfzb6q2sLt3P/hpN8icy2OLZc1gLzDWzmcBewgPM13WqUwzcBLwFXA284u5uZsXAk2b2HcID0nOBd8wsC0hy97rg+aXA3Z3aujf4+VxfNjCROqbNmD9Zew7xyBuTxm0XzSE7PYWyfbWs3XmEN7ZX8ZttVUzMTuO0wlxOjeN+GBrQFum7bsMhGEO4HVgNhIBH3b3UzO4GSty9GHgEeMLMygnvMawI1i01s2eALUArcJu7t5nZRODnwfTVycCT7v5C8Jb3As+Y2S3AbuCaftzeQbWlspbCcRnkZKZ0X1mOCyUZC6fmsHBqDvVNrby7t4ZNe47y6y0H+PWWA/x0fQUXzivgwpMKWDorj8xUXa4j0t/i+qty91XAqk5ld0U8b6SLD3F3vwe4p1PZDuC0LuofBi6Jp19DXWllzai7vqG/jUlL5txZeZw7K4/qhmbK9tfS0NTKypI9PP7WLlJDSSyensuSGeM4c8Y4Tp8+rC+LERky9JVrgNQ3tbLz8DH+5IzCRHdlyOntbUzHZaVy3ux8rjtnOo0tbZTsrOY32w7yzs5qHn59B63t4fMS8sekMX18JjPGZzItL5MJY9M0sC3SQwqHAVK2b2RN0z3UpKeEOH9uPufPDZ8m/GFzG+/urWHdrmqe27CX9/bXsn53NQApIWNKbgaFuRlMHZfB0lnjKcrLImmEToQo0h8UDgOkdK/OVBpMGakhzp45nrNnjicnIwV351B9MxXVx6g4+iF7qz/knZ1HaHnfeaakgrFpySycmsOiwhwWFeayqDCHwnEZw+42rp1pMF76i8JhgJRW1pKXlcrE7LREd2VUMjMKxqZRMDbt+DhEW7tzsK6RaeMy2bT3KJsqanj0tx/Q0hY+HDUuM4VTC3NZNDWHUwtzOK0wl4nZaVGBMZI/gEfytknPKBwGSGllLfOnZA/7b6JDUW/HLEJJxuScDD531jQ+d1b48pum1ja27a9nY8VR3q2oYdPeGr7/m/dpC8YvCsamHQ+LhVNyKMrPpKWtvdf3qNCHb9/o32/wKBwGQHNrO9sP1nHBvFmJ7op0Iy05xKmF4Q//Do0tbZRW1vJuxVE27a1hU0UNr2w9iEdch5+dnsz4rFRyM1MZk5ZMVloyY9JCjElLZlPFUfLGpJGXlUpacpK+IMiwpHAYANsO1NHS5hqMHqLi+fZ5ZnBqbIf6pla2Hahj9+Fj/HJTJUcamjnS0Myuww3UN7UePzQF8Phbu44/Tw0lkZ2RTHZ6CmMzUjjW1Ep6Soj0lCRSQkkkJyWRHDJSkoykJKO+qYV2h3Z33KG93Wlpd9ra22ltd9raPPyz3Wltb6e1zWlzJzWURFpyEjsONZASSiIlyUhJTiI1lERq8DMlOYkNe46SmRoiIyVEZmqI5FASbUF77e4cPdYcft+O93fH+cPrrfvryEoLkZUaDsTU5I/uQQ31b/ZDvX9DicJhAGzpuDJa4TBijElL5ozp4zhj+jiONUffi6K5tZ36plYamlo5c8Y4DtU3cbihmbrGVmobW6j9sIXaxlaO1DdRfayZxpb24x/uHafgAjy/eX/M909OMkJJ9oefoSRCSeFQMTNa2tppam2noan1I+119sibH/Tp3+GBV8s/8jolZOSPSWNKbgZTcjM4eqyZ3IwUxmelkT8mvGc1Um+PO9IpHAZAaWUNmakhZuZlJborMkhSk5MYn5zK+KxUPjF/Ypf1Yn1zbXc//u3982dNI8kMMzCDJAsHQryHpp5cs/t4e82t7TS3tdPc2k5L8PO8OXkca27jWHMbHza30dLWfjxwkpKMkp3VJJmRFLy/mYX7AyQZtDk0tbQdb7expZ36phaqG5r54FADNR+2HB+vIVhnXGYqeWNSyctKo6m1jck56eSPSQs/xqaRlRo64fa1tLUHfW7lYG0jTa3twaMt/LOljZY2J8nCY0jh7UkiIzWJvKw0JmSnMWN8Fhmpmja/JxQOA6C0spZTJmfrPPphqrcD3r1dP8mMpJCREoKstBP/ScbT9h/aS6Lz15OLT+46uAAs5kTK8Wt3p6GplcP1zRxuaOZwsAd1uL6JnYeP8daOw1HrpCaHD4l1fKiHw5DjAdbcg/uPr+pizwtgam4G2RkpFOVlMit/DFNy0zUedAIKh37W1u5s2VfLNWfqymgZfZLMGJuewtj0FIryPxpN7s6lCyZxoLaRww3NHKpr4lB9E0cammluaw/GUcLjKu3u4bGR1GSyUkNkpIbISktmw56jpCcnkZocIi0IlbSUECkhwx3+5Iypx8dkGppaOVTfzIHaRj441MD7VfW8uf3Q8QtUx2WmsKgwl7OKxjM+KzUR/1xDmsKhn5UfrOdYcxunTctNdFdkGOrrXstQFnntSW95N3duWfVu7D2HjsNY58zMo7axhe0H6tlUcZQ3tlfxxvYqTivM5aKTYt5XbNRSOPSzjXuOAigcRIao7PSU42ej1XzYwpvbq3hn5xE2VdSAwRc/PjvmdSyj7UwnhUM/21BxlLHpyRqMliFpJO+Z9EZORgqfXjSFC+YV8Kt39/GdF7fx/Ob9PHT9GczMH91/wwqHfrZxz1FOK8zVYLRIDEM1nMamp7DirOmcVljLT9dX8Kl/f4Mbls4Y1QHRuzkAJKbGljbe21/HadM02Z7IcHTK5Gy+eOFsstKSefS3H7Cp4miiu5QwCod+VFpZS1u7c1qhxhtEhqu8MWl88cLZTBuXwTMle9i6vzbRXUoIhUM/6hiMXqzBaJFhLSM1xJ+eW8SknHSefGc3uw43JLpLg07h0I82VhxlUnY6E7LTE90VEemj9JQQN583k5yMFB5/ayeH65sS3aVBpXDoRxv3HNV4g8gIMiYtmZvPmwnAU+/spqUHV2sPdwqHfnL0WDM7Dx/T9Q0iI8z4rFSuOXMalTWN/GrTvkR3Z9DEFQ5mtszMtppZuZndEWN5mpmtDJavMbOiiGV3BuVbzeyyoGyamb1qZmVmVmpmfxtR/2tmttfMNgSPT/V9Mwfeho7xBg1Gi4w4p0zO5oK5Bbyz8wgbR8kZTN2Gg5mFgAeBy4H5wLVmNr9TtVuAanefA9wP3BesOx9YASwAlgEPBe21Av/g7qcAS4HbOrV5v7svDh6r+rSFg2TNB0dITjIWT1c4iIxEn5w/kWnjMijeUEldY0uiuzPg4tlzOBsod/cd7t4MPA0s71RnOfB48PxZ4BILT3e4HHja3Zvc/QOgHDjb3fe5+3oAd68DyoCpfd+cxHl7x2FOm5ZLZqquKxQZiUJJxlVnFNLS1k7xxspEd2fAxRMOU4E9Ea8riP4gP17H3VuBGiAvnnWDQ1CnA2siim83s01m9qiZjSMGM7vVzErMrKSqqiqOzRg4DU2tbKqoYems8Qnth4gMrAnZ6VxyysTwbWT31iS6OwMqnnCINQ9E57kRu6pzwnXNbAzwU+BL7t5xpcn3gdnAYmAf8O1YnXL3h919ibsvKSgoOPEWDLCSXdW0tTtLZ+UltB8iMvDOn5NP4bgMijdW8mGMuwKOFPGEQwUwLeJ1IdB5n+p4HTNLBnKAIyda18xSCAfDj939Zx0V3P2Au7e5ezvwQ8KHtYa0t3ccJjnJPnLPYREZmUJJxpWLp3KsqZUXy7q+udBwF084rAXmmtlMM0slPMBc3KlOMXBT8Pxq4BV396B8RdlyAQgAAAsJSURBVHA200xgLvBOMB7xCFDm7t+JbMjMJke8/CywuacbNdg03iAyukzJzWDprDzW7DjC3qMfJro7A6LbcAjGEG4HVhMeOH7G3UvN7G4zuyKo9giQZ2blwN8DdwTrlgLPAFuAF4Db3L0N+BhwI3BxjFNWv2lm75rZJuAi4O/6a2MHgsYbREanT5wykay0ZIo37KW9u7sQDUNxfdUNTidd1ansrojnjcA1Xax7D3BPp7I3iT0egbvfGE+fhgqNN4iMThmpIS5fOImfrKtg3a5qblg6I9Fd6le6QrqPNN4gMnotnpZLUV4Wq0v3U93QnOju9CuFQx+9trWKM6aP03iDyChkZlyxeAqNLW18c/V7ie5Ov1I49MHuw8co21fLpQsmJrorIpIgk7LTOW92Pk+v3cPvd1cnujv9RuHQB6tLw6exXbZgUoJ7IiKJdMnJE5gwNo2vPreZtvaRMTitcOiD1aX7mT85m2njMxPdFRFJoLSUEF/59Hw2763lx2t2Jbo7/ULh0EsH6xpZt7taew0iAsBnFk3m/Dn5fGv1Vqrqhv+NgRQOvfTilgO4w7KFCgcRCQ9Of335Ahpb2rj3+eE/OK1w6KUXNu+nKC+TeRPHJLorIjJEzC4Yw1/80Sx+ur6CtTuPJLo7faJw6IUjDc289f5hLlswifBMICIiYbdfPIepuRl89f9tpnUY31ZU4dALT72zm9Z256ozCxPdFREZYjJTk7nrj+fz3v46HvvdzkR3p9cUDj3U0tbOE2/t4vw5+cybODbR3RGRIejS+RP5+EkF3P/iNg7UNia6O72icOihFzbvZ39tI3/2saJEd0VEhigz4+tXLKC13fnHZzfRPgyvfVA49NB//fYDZuRlctFJExLdFREZwmbkZfGVz8zn9W1V/OjNHYnuTo8pHHpg456jrN99lJvOLSIpSQPRInJiN5wzncsWTOSbL2xl456jie5Ojygc4uTu/N/ny8hOT+aaJRqIFpHumRn3XbWICWPT+Ksfr+dg3fAZf1A4xOnZdRW8veMId1x+CmPTUxLdHREZJnIzU/nBjUuoPtbM/3psLfVNrYnuUlwUDnE4XN/EPavKWDJjHCvOmtb9CiIiEU4tzOHB686gbF8df/Xj9TS3Dv3rHxQO3XB3vvaLLTQ0tfKvf3KqxhpEpFcuOnkC91y5kNe3VfFnj71DbWNLort0QgqHE3B3vvGrMn6xsZK/vniurmsQkT5ZcfZ0vnX1ItbsOMLn/vMt9tV8mOgudUnh0AV351urt/LImx9w83lF/PXFcxLdJREZAa5ZMo1Hbz6LPUeOcdn9r/OTkj24D73rIBQOMew5coxbHi/hodfe57pzpvPPfzxfcyiJSL+5YF4BxX99PidNGss/PruJGx5Zw9s7Dg+pkIgrHMxsmZltNbNyM7sjxvI0M1sZLF9jZkURy+4Myrea2WXdtWlmM4M2tgdtpvZtE+O3dX8d9z7/Hpfe/zpv7zjMVz59Ct9YvlDBICL9bnbBGFbeei53L19A2b46Vjz8Nssf/C3/9dsP2H34WKK7h3WXVGYWArYBnwQqgLXAte6+JaLOXwGL3P0LZrYC+Ky7f97M5gNPAWcDU4CXgHnBajHbNLNngJ+5+9Nm9p/ARnf//on6uGTJEi8pKenptrN1fx2/2XaQsn11bKo4yvtVDYSSjEvnT+Qrn5nP1NyMHrcZryfX7B6wtkVk8F13zvRer9vY0sZP11fw2G93sv1gPQCF4zI4ZXI2J00cy6ScdCZmp5OdnkxmajIpyUZbu9PW7kzJzSB/TFqv3tfM1rn7kljLkuNY/2yg3N13BI09DSwHtkTUWQ58LXj+LPCAhb9uLweedvcm4AMzKw/aI1abZlYGXAxcF9R5PGj3hOHQW29sr+JfV73HpOx0Tpk8lpvOK+JTp07u9T+0iEhvpKeEuP6cGVx/zgx2HmrglfcOsm53Ne/tq+XlsgOcaGqmb1y5kBuWzuj3PsUTDlOBPRGvK4Bzuqrj7q1mVgPkBeVvd1p3avA8Vpt5wFF3b41R/yPM7Fbg1uBlvZltjWNbYtoFrAEe620DPZMPHBqctxpStN2jy6jb7usTtM033gc39n71LlMlnnCIdcC9c451Vaer8lhjHSeqH13o/jDwcKxlQ5mZlXS1GzeSabtHl9G43SNtm+MZkK4AIi8LLgQqu6pjZslADnDkBOt2VX4IyA3a6Oq9RERkgMUTDmuBucFZRKnACqC4U51i4Kbg+dXAKx4e6S4GVgRnM80E5gLvdNVmsM6rQRsEbT7X+80TEZHe6PawUjCGcDuwGggBj7p7qZndDZS4ezHwCPBEMOB8hPCHPUG9ZwgPXrcCt7l7G0CsNoO3/N/A02b2DeD3QdsjybA7FNZPtN2jy2jc7hG1zd2eyioiIqOPrpAWEZEoCgcREYmicBhE3U1DMpKY2U4ze9fMNphZSVA23sxeDKZGedHMxiW6n31lZo+a2UEz2xxRFnM7Lex7we9/k5mdkbie914X2/w1M9sb/L43mNmnIpbFnEJnuDGzaWb2qpmVmVmpmf1tUD4if98Kh0ESTEPyIHA5MB+4NpheZCS7yN0XR5z7fQfwsrvPBV4OXg93jwHLOpV1tZ2XEz5jby7hCzgH5Mr/QfAY0dsMcH/w+17s7qsAgv/jK4AFwToPBX8Lw1Er8A/ufgqwFLgt2L4R+ftWOAye49OQuHsz0DENyWiynPCUKAQ/r0xgX/qFu79O+Ay9SF1t53Lgvz3sbcLX9EwenJ72ny62uSvHp9Bx9w+AyCl0hhV33+fu64PndUAZ4RkcRuTvW+EweGJNQxJzapARwoFfm9m6YKoTgInuvg/Cf2jAhIT1bmB1tZ0j/f/A7cHhk0cjDhmOyG0OZp4+nfDMOyPy961wGDxxTw0yQnzM3c8gvGt9m5ldkOgODQEj+f/A94HZwGJgH/DtoHzEbbOZjQF+CnzJ3WtPVDVG2bDZdoXD4IlnGpIRw90rg58HgZ8TPpRwoGO3Ovh5MHE9HFBdbeeI/T/g7gfcvc3d24Ef8odDRyNqm80shXAw/NjdfxYUj8jft8Jh8MQzDcmIYGZZZja24zlwKbCZj06zMpKnRulqO4uBPw3OYlkK1HQcjhjuOh1L/yzh3zd0PYXOsBPchuARoMzdvxOxaET+vuOZlVX6QVfTkCS4WwNlIvDz8N8SycCT7v6Cma0FnjGzW4DdwDUJ7GO/MLOngI8D+WZWAfwzcC+xt3MV8CnCg7LHgD8b9A73gy62+eNmtpjwYZOdwF/CiafQGYY+Rnh27HfNbENQ9n8Yob9vTZ8hIiJRdFhJRESiKBxERCSKwkFERKIoHEREJIrCQUREoigcRBIgmMX0y4nuh0hXFA4ifRRc5KS/JRlR9B9apBfMrCiY1/8hYD3wiJmVBPP8fz2i3k4z+7qZrQ/ub3FyjLb+wsyeN7OMwdwGkRNROIj03kmEp2Q+nfA8/0uARcCFZrYoot6hYBLC7wMfOZQUXDX/x8CV7v7hIPVbpFsKB5He2xXM0w/wOTNbD/ye8I1tIm/k1DFB2zqgKKL8RsKz1l7l7k0D3FeRHlE4iPReA0AwodyXgUvcfRHwKyA9ol7HB38bH53PbDPhsCgc8J6K9JDCQaTvsgkHRY2ZTSS8NxCP3xOeoK7YzKYMVOdEekPhINJH7r6R8Ad9KfAo8NserPsm4b2OX5lZ/sD0UKTnNCuriIhE0Z6DiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhE+f8jrSB9eAmL/wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(albums[\"rank\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "#albums.weeks_top = albums.days_top.apply(lambda x :(x+7)/7, result_type='expand')\n", + "albums = albums.assign(weeks_top=(albums['days_top'] + 7) / 7)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_topweeks_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.07.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.057.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.01.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.023.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.026.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top weeks_top \n", + "0 107 72.0 NaN 42.0 7.0 \n", + "1 8 1.0 230416.000000 392.0 57.0 \n", + "2 195 10.0 290017.000000 0.0 1.0 \n", + "3 17 NaN NaN 154.0 23.0 \n", + "4 46 12.0 224445.166667 175.0 26.0 " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endrankalbum_lengthtrack_lengthdays_topweeks_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.07.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.057.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.01.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.023.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.026.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank album_length track_length days_top weeks_top \n", + "0 107 72.0 NaN 42.0 7.0 \n", + "1 8 1.0 230416.000000 392.0 57.0 \n", + "2 195 10.0 290017.000000 0.0 1.0 \n", + "3 17 NaN NaN 154.0 23.0 \n", + "4 46 12.0 224445.166667 175.0 26.0 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums = albums.rename(columns={\"length\":\"album_length\"})\n", + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "albums_merge = albums.drop(columns=[\"date_start\",\"date_end\",\"days_top\",\"track_length\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumrankalbum_lengthweeks_top
0Various ArtistsRoots Of Country Music (1965)10772.07.0
1Kenny GSilhouette81.057.0
2!!! (Chk Chk Chk)Myth Takes19510.01.0
\n", + "
" + ], + "text/plain": [ + " artist album rank album_length \\\n", + "0 Various Artists Roots Of Country Music (1965) 107 72.0 \n", + "1 Kenny G Silhouette 8 1.0 \n", + "2 !!! (Chk Chk Chk) Myth Takes 195 10.0 \n", + "\n", + " weeks_top \n", + "0 7.0 \n", + "1 57.0 \n", + "2 1.0 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums_merge.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking = df_acoustic_features.merge(albums_merge,on=[\"artist\",\"album\"], how=\"left\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "339825 False\n", + "339826 False\n", + "339827 False\n", + "339828 False\n", + "339829 False\n", + "339830 False\n", + "339831 False\n", + "339832 False\n", + "339833 False\n", + "339834 False\n", + "339835 False\n", + "339836 False\n", + "339837 False\n", + "339838 False\n", + "339839 False\n", + "339840 False\n", + "339841 False\n", + "339842 False\n", + "339843 False\n", + "339844 False\n", + "339845 False\n", + "339846 False\n", + "339847 False\n", + "339848 False\n", + "339849 False\n", + "339850 False\n", + "339851 False\n", + "339852 False\n", + "339853 False\n", + "339854 False\n", + "Name: rank, Length: 339855, dtype: bool" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[\"rank\"].isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.0000006.0...0.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.0000007.0...0.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
218yllZD0TdF7ykcREib8Z1I Did ItHoodie SZNA Boogie Wit da Hoodie0.15300.718215305.00.4540.0000468.0...1.00.12789.4834.00.1963r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
31wJRveJZLSb1rjhnUHQiv6Swervin (feat. 6ix9ine)Hoodie SZNA Boogie Wit da Hoodie0.01530.581189487.00.6620.0000009.0...1.00.30393.0234.00.4343r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
40jAfdqv18goRTUxm3ilRjbStartender (feat. Offset and Tyga)Hoodie SZNA Boogie Wit da Hoodie0.02350.736192779.00.6220.0000006.0...0.00.133191.9714.00.5063r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
\n", + "

5 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "2 18yllZD0TdF7ykcREib8Z1 I Did It Hoodie SZN \n", + "3 1wJRveJZLSb1rjhnUHQiv6 Swervin (feat. 6ix9ine) Hoodie SZN \n", + "4 0jAfdqv18goRTUxm3ilRjb Startender (feat. Offset and Tyga) Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "2 A Boogie Wit da Hoodie 0.1530 0.718 215305.0 0.454 \n", + "3 A Boogie Wit da Hoodie 0.0153 0.581 189487.0 0.662 \n", + "4 A Boogie Wit da Hoodie 0.0235 0.736 192779.0 0.622 \n", + "\n", + " instrumentalness key ... mode speechiness tempo time_signature \\\n", + "0 0.000000 6.0 ... 0.0 0.427 90.195 4.0 \n", + "1 0.000000 7.0 ... 0.0 0.158 126.023 4.0 \n", + "2 0.000046 8.0 ... 1.0 0.127 89.483 4.0 \n", + "3 0.000000 9.0 ... 1.0 0.303 93.023 4.0 \n", + "4 0.000000 6.0 ... 0.0 0.133 191.971 4.0 \n", + "\n", + " valence album_id date rank album_length weeks_top \n", + "0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "1 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "2 0.196 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "3 0.434 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "4 0.506 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "\n", + "[5 rows x 22 columns]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
2419996yG67QYhkj9WVIt7WgyHE1SilhouetteSilhouette0.9350.731223507.00.1670.0000006.0...0.00.029794.9324.00.27461vU2oVw8d5QkTNhW8PYla2018-02-09NaNNaNNaN
3343000ZUFk0afed2FkqmuJ5zGxRShootin' Creek (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9530.712204307.00.5140.0042505.0...1.00.0391133.5584.00.9230jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343015Xhj8u4dAeZYaZwXQJsQ4EBaltimore Fire (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9010.512191800.00.4390.0000017.0...1.00.0311115.8614.00.9350jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343022bKkPsqxC7o4V2lcwbI18ULeaving Home (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9880.420189253.00.4310.0003140.0...1.00.0406205.5553.00.9010jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343031hPEhm8zQk4hAyYeD1pHUcThere'll Come a Time (with The North Carolina ...Roots Of Country Music (1965)0.9610.445209027.00.3320.0000005.0...1.00.0336182.9933.00.8310jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343043UG5yKkePf93z9RSBsbPE5White House Blues (with The North Carolina Ram...Roots Of Country Music (1965)0.9190.546208813.00.4090.0000067.0...1.00.0477121.4444.00.8150jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343053UCBtdESy842iTU1uHEogXThe Highwayman (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9300.523198253.00.3090.0000000.0...1.00.034796.0584.00.7760jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343067z33ai31tZQ9vEKQQwww5DHungry Hash House (with The North Carolina Ram...Roots Of Country Music (1965)0.9220.532203227.00.3970.0000017.0...1.00.0458116.4924.00.9020jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343073hntqfPOhpYrmNg33aqonbThe Letter that Never Came (with The North Car...Roots Of Country Music (1965)0.9650.533168800.00.4120.00000010.0...1.00.0462106.3164.00.7810jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343086rxrbo1o1QuL2SKV6M7NA1Take a Drink on Me (with The North Carolina Ra...Roots Of Country Music (1965)0.9240.588194680.00.5360.0000312.0...1.00.0333125.6724.00.9740jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343093cUQJtLu2yq4FBaoe36oklHusband and Wife Were Angry One Night (with Th...Roots Of Country Music (1965)0.9630.710170813.00.3220.0000007.0...1.00.028897.0444.00.9330jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343102k7AzvedyJktnDflUVcKYBRamblin' Blues (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9270.681185933.00.4480.0000490.0...1.00.0366121.7304.00.9210jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343113xongt156K2h28zHBCGkjVTook My Gal A-Walkin' (with The North Carolina...Roots Of Country Music (1965)0.9520.554168147.00.3120.0000072.0...1.00.0315104.9234.00.9280jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343127gHZDMFYE5Obq96MJRE5UDOld and Only in the Way (with The North Caroli...Roots Of Country Music (1965)0.9690.499207880.00.2900.0000005.0...1.00.0473175.9444.00.6490jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343130kK3Qs81peDYr3XPIYPctPDon't Let Your Deal Go Down Blues (with The No...Roots Of Country Music (1965)0.9900.594172027.00.4130.0477002.0...1.00.0367137.3384.00.9050jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343141W05sAuCWcViCCz6ib3KZyBill Mason (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9100.751180267.00.4780.0000227.0...1.00.0775119.0504.00.9560jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343153PzmwPmvLlYIyw5yiTjmKUA Kiss Waltz (with The North Carolina Ramblers...Roots Of Country Music (1965)0.8750.407188373.00.3550.8090005.0...1.00.0347165.1453.00.9380jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343164hFhhvQBSi53aR2EQ2yXjIFlop Eared Mule (with Chris Howland & The High...Roots Of Country Music (1965)0.9740.638178387.00.7500.5340002.0...1.00.0399131.7644.00.8850jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343170uw1DoIdB75eilYSFPpEDpA Trip to New York, Pt. 1 (with Allegheny High...Roots Of Country Music (1965)0.9250.573185413.00.4160.0000037.0...1.00.3810133.9304.00.8500jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343186F6KTnlicSqAlNwW8MgMjKSweet Sixteen (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9170.644174053.00.4030.0000007.0...1.00.0450114.8014.00.8920jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343193Dj4DXFrIf1GZMXyL01C3JWrite a Letter to My Mother (with The North Ca...Roots Of Country Music (1965)0.8750.696181773.00.4070.0001418.0...1.00.026797.0324.00.9290jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343205O1FjxnI4fGAdJWRv9Uwy9If the River Was Whiskey (with The North Carol...Roots Of Country Music (1965)0.9230.528189573.00.3540.0000073.0...1.00.049999.1704.00.7430jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343212qZjqI8LU7efzauVIonjHlMother's Last Farewell Kiss (with The North Ca...Roots Of Country Music (1965)0.9660.486183853.00.2260.0000005.0...1.00.0351173.1934.00.5770jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343222s1fbQyQ5hPjsbbGZkMC2RMilwaukee Blues (with The North Carolina Rambl...Roots Of Country Music (1965)0.9780.738197267.00.4110.0000291.0...1.00.0390106.9084.00.9190jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343237sqb1bBC5EKowm4oqoKkmoWhere the Whippoorwill Is Whispering Good-Nigh...Roots Of Country Music (1965)0.9740.667189733.00.2390.0000008.0...1.00.033590.3504.00.7210jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343245MgjNSxvgAGWyQF7C2jKaFThe Girl I Left in Sunny Tennessee (with The N...Roots Of Country Music (1965)0.9910.709201840.00.4230.0423002.0...1.00.0326128.8474.00.8850jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343255XrKPffmwEP65ZiCTeOeX4Sunny Tennesee (with Floyd County Ramblers) - ...Roots Of Country Music (1965)0.9440.491196267.00.3910.0000000.0...1.00.0334114.4474.00.6240jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343262woAfvj29PxBBWvhJ6QiQHBulldog Down in Sunny Tennessee (with Dock Walsh)Roots Of Country Music (1965)0.9640.648160053.00.3970.0000085.0...1.00.0402167.3104.00.9220jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33432743zp4lGaVAbD86JsU7IuNjMoving Day (with Arthur Collins)Roots Of Country Music (1965)0.9940.658193360.00.1940.0000005.0...1.00.084975.2874.00.6980jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343287foZTkS7CxXV1W28GoB5O1It's Movin' Day (with The North Carolina Rambl...Roots Of Country Music (1965)0.8710.540207107.00.3490.0000136.0...1.00.033995.8014.00.8420jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33432922j6YCqpxVHXF3EmsYRMBkHome Sweet, Home (with Frank Jenkins)Roots Of Country Music (1965)0.9740.481153160.00.5370.8810002.0...1.00.0414129.5724.00.9740jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343306pH9yEW5e4Lnd7MIY6wFN3I'm the Man that Rode the Mule 'Round the Worl...Roots Of Country Music (1965)0.9900.668183293.00.3920.4190002.0...1.00.0476139.6314.00.9230jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343314eIEru6KjlZeio3zHj7obaMan that Rode the Mule Around the World (with ...Roots Of Country Music (1965)0.9460.692175547.00.3410.0000328.0...1.00.0436123.5764.00.7970jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33433266hnNRU8IhBnxJZS7I3xXGLynchburg Town (with Chris Howland & The Highl...Roots Of Country Music (1965)0.9700.709180667.00.6520.6510000.0...1.00.0313131.1214.00.7730jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343337v4VLqGkxpAmFO7FIUmj6rGoing Down to Lynchburg Town / Don't Let Your ...Roots Of Country Music (1965)0.9930.582188600.00.5650.8820008.0...1.00.0350145.4124.00.9360jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343343z7ZQf4bZgCFLjPjXmJFeZSome One (with Branch & Coleman)Roots Of Country Music (1965)0.9830.462183360.00.2770.00000010.0...1.00.0323178.3213.00.6340jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343352kUpX7DIdcvjbPsEFYK5GhMonkey on a String (with Cal Stewart)Roots Of Country Music (1965)0.9830.590151040.00.3660.00000011.0...1.00.255083.0113.00.9350jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33433633s2KcXL1G6pQkjZsi01YcMonkey on a String (with The North Carolina Ra...Roots Of Country Music (1965)0.9890.589187253.00.4540.00722010.0...1.00.0391102.9264.00.8870jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343375dgeBwWjLx357PEFJtUQKXCan I Sleep in Your Barn Tonight Mister (with ...Roots Of Country Music (1965)0.9930.675191693.00.3600.0025309.0...1.00.0406132.8184.00.8650jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343380gN9Hm4DeszZez2XIXyyEvMay I Sleep in Your Barn Tonight, Mister (with...Roots Of Country Music (1965)0.9270.649171600.00.5220.0060006.0...1.00.0599126.3004.00.5670jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343396BqRH6HC4Qqn47dJz0iYAIMarried Life Blues (with Byron Parker)Roots Of Country Music (1965)0.9690.567160987.00.3690.0000032.0...1.00.0323113.0874.00.7400jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343400WPljjnATI6qeuBzvRjmtVThe Infanta March (with Fred van Eps)Roots Of Country Music (1965)0.9680.551260560.00.6310.9500000.0...1.00.0571123.8164.00.9630jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343411WnKHpJccBviOIVY8ttV4HSunset March (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9790.637159760.00.4500.9410000.0...1.00.0652114.5024.00.6480jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343426mkkTKSp6UvaeZSsABnXvOI'll Roll in My Sweet Baby's Arms (with Carter...Roots Of Country Music (1965)0.9650.657178080.00.4380.00000010.0...0.00.0846144.2144.00.8130jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343431WsnmtfBvMtwW38EfzR62gGoodbye Eliza Jane (with Peerless Quartet)Roots Of Country Music (1965)0.9950.65597107.00.4580.0000004.0...0.00.8840114.8511.00.4050jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33434467BWokCtjylxW4hsq2EcysGood-Bye Sweet Liza Jane (with The North Carol...Roots Of Country Music (1965)0.9470.657185147.00.3500.0000007.0...1.00.0387105.3394.00.8980jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343454VKveoCmYHMm74GFORF9gmGoodbye Booze (with The North Carolina Rambler...Roots Of Country Music (1965)0.9870.399195240.00.3050.0053405.0...1.00.0338182.0234.00.8480jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33434648l5jPrKfDdVLLgL89kQxiGoodbye Booze (with Fate Norris & Gid Tanner)Roots Of Country Music (1965)0.9880.608170333.00.2860.0000038.0...1.00.0406120.2454.00.6500jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343471L9FE9o1C480C0Argv1K3uYou Ain't Talking to Me (with Eddie Morton)Roots Of Country Music (1965)0.9960.533174920.00.2770.0016404.0...0.00.196085.1844.00.7650jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343483ioWkuIRTynwbAgfPP1eVLYou Ain't Talkin' to Me (with The North Caroli...Roots Of Country Music (1965)0.9310.628177480.00.4030.0000012.0...1.00.0429122.5154.00.8670jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343492J4Mu7AEozVHgoMzOy214LIf I Lose, I Don't Care (with The North Caroli...Roots Of Country Music (1965)0.9380.558187600.00.3660.0000092.0...1.00.0497124.8024.00.8730jckzrST0anXNTY13KMbSw2005NaNNaNNaN
\n", + "

51 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "241999 6yG67QYhkj9WVIt7WgyHE1 \n", + "334300 0ZUFk0afed2FkqmuJ5zGxR \n", + "334301 5Xhj8u4dAeZYaZwXQJsQ4E \n", + "334302 2bKkPsqxC7o4V2lcwbI18U \n", + "334303 1hPEhm8zQk4hAyYeD1pHUc \n", + "334304 3UG5yKkePf93z9RSBsbPE5 \n", + "334305 3UCBtdESy842iTU1uHEogX \n", + "334306 7z33ai31tZQ9vEKQQwww5D \n", + "334307 3hntqfPOhpYrmNg33aqonb \n", + "334308 6rxrbo1o1QuL2SKV6M7NA1 \n", + "334309 3cUQJtLu2yq4FBaoe36okl \n", + "334310 2k7AzvedyJktnDflUVcKYB \n", + "334311 3xongt156K2h28zHBCGkjV \n", + "334312 7gHZDMFYE5Obq96MJRE5UD \n", + "334313 0kK3Qs81peDYr3XPIYPctP \n", + "334314 1W05sAuCWcViCCz6ib3KZy \n", + "334315 3PzmwPmvLlYIyw5yiTjmKU \n", + "334316 4hFhhvQBSi53aR2EQ2yXjI \n", + "334317 0uw1DoIdB75eilYSFPpEDp \n", + "334318 6F6KTnlicSqAlNwW8MgMjK \n", + "334319 3Dj4DXFrIf1GZMXyL01C3J \n", + "334320 5O1FjxnI4fGAdJWRv9Uwy9 \n", + "334321 2qZjqI8LU7efzauVIonjHl \n", + "334322 2s1fbQyQ5hPjsbbGZkMC2R \n", + "334323 7sqb1bBC5EKowm4oqoKkmo \n", + "334324 5MgjNSxvgAGWyQF7C2jKaF \n", + "334325 5XrKPffmwEP65ZiCTeOeX4 \n", + "334326 2woAfvj29PxBBWvhJ6QiQH \n", + "334327 43zp4lGaVAbD86JsU7IuNj \n", + "334328 7foZTkS7CxXV1W28GoB5O1 \n", + "334329 22j6YCqpxVHXF3EmsYRMBk \n", + "334330 6pH9yEW5e4Lnd7MIY6wFN3 \n", + "334331 4eIEru6KjlZeio3zHj7oba \n", + "334332 66hnNRU8IhBnxJZS7I3xXG \n", + "334333 7v4VLqGkxpAmFO7FIUmj6r \n", + "334334 3z7ZQf4bZgCFLjPjXmJFeZ \n", + "334335 2kUpX7DIdcvjbPsEFYK5Gh \n", + "334336 33s2KcXL1G6pQkjZsi01Yc \n", + "334337 5dgeBwWjLx357PEFJtUQKX \n", + "334338 0gN9Hm4DeszZez2XIXyyEv \n", + "334339 6BqRH6HC4Qqn47dJz0iYAI \n", + "334340 0WPljjnATI6qeuBzvRjmtV \n", + "334341 1WnKHpJccBviOIVY8ttV4H \n", + "334342 6mkkTKSp6UvaeZSsABnXvO \n", + "334343 1WsnmtfBvMtwW38EfzR62g \n", + "334344 67BWokCtjylxW4hsq2Ecys \n", + "334345 4VKveoCmYHMm74GFORF9gm \n", + "334346 48l5jPrKfDdVLLgL89kQxi \n", + "334347 1L9FE9o1C480C0Argv1K3u \n", + "334348 3ioWkuIRTynwbAgfPP1eVL \n", + "334349 2J4Mu7AEozVHgoMzOy214L \n", + "\n", + " song \\\n", + "241999 Silhouette \n", + "334300 Shootin' Creek (with The North Carolina Ramblers) \n", + "334301 Baltimore Fire (with The North Carolina Ramblers) \n", + "334302 Leaving Home (with The North Carolina Ramblers) \n", + "334303 There'll Come a Time (with The North Carolina ... \n", + "334304 White House Blues (with The North Carolina Ram... \n", + "334305 The Highwayman (with The North Carolina Ramblers) \n", + "334306 Hungry Hash House (with The North Carolina Ram... \n", + "334307 The Letter that Never Came (with The North Car... \n", + "334308 Take a Drink on Me (with The North Carolina Ra... \n", + "334309 Husband and Wife Were Angry One Night (with Th... \n", + "334310 Ramblin' Blues (with The North Carolina Ramblers) \n", + "334311 Took My Gal A-Walkin' (with The North Carolina... \n", + "334312 Old and Only in the Way (with The North Caroli... \n", + "334313 Don't Let Your Deal Go Down Blues (with The No... \n", + "334314 Bill Mason (with The North Carolina Ramblers) \n", + "334315 A Kiss Waltz (with The North Carolina Ramblers... \n", + "334316 Flop Eared Mule (with Chris Howland & The High... \n", + "334317 A Trip to New York, Pt. 1 (with Allegheny High... \n", + "334318 Sweet Sixteen (with The North Carolina Ramblers) \n", + "334319 Write a Letter to My Mother (with The North Ca... \n", + "334320 If the River Was Whiskey (with The North Carol... \n", + "334321 Mother's Last Farewell Kiss (with The North Ca... \n", + "334322 Milwaukee Blues (with The North Carolina Rambl... \n", + "334323 Where the Whippoorwill Is Whispering Good-Nigh... \n", + "334324 The Girl I Left in Sunny Tennessee (with The N... \n", + "334325 Sunny Tennesee (with Floyd County Ramblers) - ... \n", + "334326 Bulldog Down in Sunny Tennessee (with Dock Walsh) \n", + "334327 Moving Day (with Arthur Collins) \n", + "334328 It's Movin' Day (with The North Carolina Rambl... \n", + "334329 Home Sweet, Home (with Frank Jenkins) \n", + "334330 I'm the Man that Rode the Mule 'Round the Worl... \n", + "334331 Man that Rode the Mule Around the World (with ... \n", + "334332 Lynchburg Town (with Chris Howland & The Highl... \n", + "334333 Going Down to Lynchburg Town / Don't Let Your ... \n", + "334334 Some One (with Branch & Coleman) \n", + "334335 Monkey on a String (with Cal Stewart) \n", + "334336 Monkey on a String (with The North Carolina Ra... \n", + "334337 Can I Sleep in Your Barn Tonight Mister (with ... \n", + "334338 May I Sleep in Your Barn Tonight, Mister (with... \n", + "334339 Married Life Blues (with Byron Parker) \n", + "334340 The Infanta March (with Fred van Eps) \n", + "334341 Sunset March (with The North Carolina Ramblers) \n", + "334342 I'll Roll in My Sweet Baby's Arms (with Carter... \n", + "334343 Goodbye Eliza Jane (with Peerless Quartet) \n", + "334344 Good-Bye Sweet Liza Jane (with The North Carol... \n", + "334345 Goodbye Booze (with The North Carolina Rambler... \n", + "334346 Goodbye Booze (with Fate Norris & Gid Tanner) \n", + "334347 You Ain't Talking to Me (with Eddie Morton) \n", + "334348 You Ain't Talkin' to Me (with The North Caroli... \n", + "334349 If I Lose, I Don't Care (with The North Caroli... \n", + "\n", + " album artist acousticness danceability \\\n", + "241999 Silhouette 0.935 0.731 \n", + "334300 Roots Of Country Music (1965) 0.953 0.712 \n", + "334301 Roots Of Country Music (1965) 0.901 0.512 \n", + "334302 Roots Of Country Music (1965) 0.988 0.420 \n", + "334303 Roots Of Country Music (1965) 0.961 0.445 \n", + "334304 Roots Of Country Music (1965) 0.919 0.546 \n", + "334305 Roots Of Country Music (1965) 0.930 0.523 \n", + "334306 Roots Of Country Music (1965) 0.922 0.532 \n", + "334307 Roots Of Country Music (1965) 0.965 0.533 \n", + "334308 Roots Of Country Music (1965) 0.924 0.588 \n", + "334309 Roots Of Country Music (1965) 0.963 0.710 \n", + "334310 Roots Of Country Music (1965) 0.927 0.681 \n", + "334311 Roots Of Country Music (1965) 0.952 0.554 \n", + "334312 Roots Of Country Music (1965) 0.969 0.499 \n", + "334313 Roots Of Country Music (1965) 0.990 0.594 \n", + "334314 Roots Of Country Music (1965) 0.910 0.751 \n", + "334315 Roots Of Country Music (1965) 0.875 0.407 \n", + "334316 Roots Of Country Music (1965) 0.974 0.638 \n", + "334317 Roots Of Country Music (1965) 0.925 0.573 \n", + "334318 Roots Of Country Music (1965) 0.917 0.644 \n", + "334319 Roots Of Country Music (1965) 0.875 0.696 \n", + "334320 Roots Of Country Music (1965) 0.923 0.528 \n", + "334321 Roots Of Country Music (1965) 0.966 0.486 \n", + "334322 Roots Of Country Music (1965) 0.978 0.738 \n", + "334323 Roots Of Country Music (1965) 0.974 0.667 \n", + "334324 Roots Of Country Music (1965) 0.991 0.709 \n", + "334325 Roots Of Country Music (1965) 0.944 0.491 \n", + "334326 Roots Of Country Music (1965) 0.964 0.648 \n", + "334327 Roots Of Country Music (1965) 0.994 0.658 \n", + "334328 Roots Of Country Music (1965) 0.871 0.540 \n", + "334329 Roots Of Country Music (1965) 0.974 0.481 \n", + "334330 Roots Of Country Music (1965) 0.990 0.668 \n", + "334331 Roots Of Country Music (1965) 0.946 0.692 \n", + "334332 Roots Of Country Music (1965) 0.970 0.709 \n", + "334333 Roots Of Country Music (1965) 0.993 0.582 \n", + "334334 Roots Of Country Music (1965) 0.983 0.462 \n", + "334335 Roots Of Country Music (1965) 0.983 0.590 \n", + "334336 Roots Of Country Music (1965) 0.989 0.589 \n", + "334337 Roots Of Country Music (1965) 0.993 0.675 \n", + "334338 Roots Of Country Music (1965) 0.927 0.649 \n", + "334339 Roots Of Country Music (1965) 0.969 0.567 \n", + "334340 Roots Of Country Music (1965) 0.968 0.551 \n", + "334341 Roots Of Country Music (1965) 0.979 0.637 \n", + "334342 Roots Of Country Music (1965) 0.965 0.657 \n", + "334343 Roots Of Country Music (1965) 0.995 0.655 \n", + "334344 Roots Of Country Music (1965) 0.947 0.657 \n", + "334345 Roots Of Country Music (1965) 0.987 0.399 \n", + "334346 Roots Of Country Music (1965) 0.988 0.608 \n", + "334347 Roots Of Country Music (1965) 0.996 0.533 \n", + "334348 Roots Of Country Music (1965) 0.931 0.628 \n", + "334349 Roots Of Country Music (1965) 0.938 0.558 \n", + "\n", + " duration_ms energy instrumentalness key ... mode speechiness \\\n", + "241999 223507.0 0.167 0.000000 6.0 ... 0.0 0.0297 \n", + "334300 204307.0 0.514 0.004250 5.0 ... 1.0 0.0391 \n", + "334301 191800.0 0.439 0.000001 7.0 ... 1.0 0.0311 \n", + "334302 189253.0 0.431 0.000314 0.0 ... 1.0 0.0406 \n", + "334303 209027.0 0.332 0.000000 5.0 ... 1.0 0.0336 \n", + "334304 208813.0 0.409 0.000006 7.0 ... 1.0 0.0477 \n", + "334305 198253.0 0.309 0.000000 0.0 ... 1.0 0.0347 \n", + "334306 203227.0 0.397 0.000001 7.0 ... 1.0 0.0458 \n", + "334307 168800.0 0.412 0.000000 10.0 ... 1.0 0.0462 \n", + "334308 194680.0 0.536 0.000031 2.0 ... 1.0 0.0333 \n", + "334309 170813.0 0.322 0.000000 7.0 ... 1.0 0.0288 \n", + "334310 185933.0 0.448 0.000049 0.0 ... 1.0 0.0366 \n", + "334311 168147.0 0.312 0.000007 2.0 ... 1.0 0.0315 \n", + "334312 207880.0 0.290 0.000000 5.0 ... 1.0 0.0473 \n", + "334313 172027.0 0.413 0.047700 2.0 ... 1.0 0.0367 \n", + "334314 180267.0 0.478 0.000022 7.0 ... 1.0 0.0775 \n", + "334315 188373.0 0.355 0.809000 5.0 ... 1.0 0.0347 \n", + "334316 178387.0 0.750 0.534000 2.0 ... 1.0 0.0399 \n", + "334317 185413.0 0.416 0.000003 7.0 ... 1.0 0.3810 \n", + "334318 174053.0 0.403 0.000000 7.0 ... 1.0 0.0450 \n", + "334319 181773.0 0.407 0.000141 8.0 ... 1.0 0.0267 \n", + "334320 189573.0 0.354 0.000007 3.0 ... 1.0 0.0499 \n", + "334321 183853.0 0.226 0.000000 5.0 ... 1.0 0.0351 \n", + "334322 197267.0 0.411 0.000029 1.0 ... 1.0 0.0390 \n", + "334323 189733.0 0.239 0.000000 8.0 ... 1.0 0.0335 \n", + "334324 201840.0 0.423 0.042300 2.0 ... 1.0 0.0326 \n", + "334325 196267.0 0.391 0.000000 0.0 ... 1.0 0.0334 \n", + "334326 160053.0 0.397 0.000008 5.0 ... 1.0 0.0402 \n", + "334327 193360.0 0.194 0.000000 5.0 ... 1.0 0.0849 \n", + "334328 207107.0 0.349 0.000013 6.0 ... 1.0 0.0339 \n", + "334329 153160.0 0.537 0.881000 2.0 ... 1.0 0.0414 \n", + "334330 183293.0 0.392 0.419000 2.0 ... 1.0 0.0476 \n", + "334331 175547.0 0.341 0.000032 8.0 ... 1.0 0.0436 \n", + "334332 180667.0 0.652 0.651000 0.0 ... 1.0 0.0313 \n", + "334333 188600.0 0.565 0.882000 8.0 ... 1.0 0.0350 \n", + "334334 183360.0 0.277 0.000000 10.0 ... 1.0 0.0323 \n", + "334335 151040.0 0.366 0.000000 11.0 ... 1.0 0.2550 \n", + "334336 187253.0 0.454 0.007220 10.0 ... 1.0 0.0391 \n", + "334337 191693.0 0.360 0.002530 9.0 ... 1.0 0.0406 \n", + "334338 171600.0 0.522 0.006000 6.0 ... 1.0 0.0599 \n", + "334339 160987.0 0.369 0.000003 2.0 ... 1.0 0.0323 \n", + "334340 260560.0 0.631 0.950000 0.0 ... 1.0 0.0571 \n", + "334341 159760.0 0.450 0.941000 0.0 ... 1.0 0.0652 \n", + "334342 178080.0 0.438 0.000000 10.0 ... 0.0 0.0846 \n", + "334343 97107.0 0.458 0.000000 4.0 ... 0.0 0.8840 \n", + "334344 185147.0 0.350 0.000000 7.0 ... 1.0 0.0387 \n", + "334345 195240.0 0.305 0.005340 5.0 ... 1.0 0.0338 \n", + "334346 170333.0 0.286 0.000003 8.0 ... 1.0 0.0406 \n", + "334347 174920.0 0.277 0.001640 4.0 ... 0.0 0.1960 \n", + "334348 177480.0 0.403 0.000001 2.0 ... 1.0 0.0429 \n", + "334349 187600.0 0.366 0.000009 2.0 ... 1.0 0.0497 \n", + "\n", + " tempo time_signature valence album_id date \\\n", + "241999 94.932 4.0 0.274 61vU2oVw8d5QkTNhW8PYla 2018-02-09 \n", + "334300 133.558 4.0 0.923 0jckzrST0anXNTY13KMbSw 2005 \n", + "334301 115.861 4.0 0.935 0jckzrST0anXNTY13KMbSw 2005 \n", + "334302 205.555 3.0 0.901 0jckzrST0anXNTY13KMbSw 2005 \n", + "334303 182.993 3.0 0.831 0jckzrST0anXNTY13KMbSw 2005 \n", + "334304 121.444 4.0 0.815 0jckzrST0anXNTY13KMbSw 2005 \n", + "334305 96.058 4.0 0.776 0jckzrST0anXNTY13KMbSw 2005 \n", + "334306 116.492 4.0 0.902 0jckzrST0anXNTY13KMbSw 2005 \n", + "334307 106.316 4.0 0.781 0jckzrST0anXNTY13KMbSw 2005 \n", + "334308 125.672 4.0 0.974 0jckzrST0anXNTY13KMbSw 2005 \n", + "334309 97.044 4.0 0.933 0jckzrST0anXNTY13KMbSw 2005 \n", + "334310 121.730 4.0 0.921 0jckzrST0anXNTY13KMbSw 2005 \n", + "334311 104.923 4.0 0.928 0jckzrST0anXNTY13KMbSw 2005 \n", + "334312 175.944 4.0 0.649 0jckzrST0anXNTY13KMbSw 2005 \n", + "334313 137.338 4.0 0.905 0jckzrST0anXNTY13KMbSw 2005 \n", + "334314 119.050 4.0 0.956 0jckzrST0anXNTY13KMbSw 2005 \n", + "334315 165.145 3.0 0.938 0jckzrST0anXNTY13KMbSw 2005 \n", + "334316 131.764 4.0 0.885 0jckzrST0anXNTY13KMbSw 2005 \n", + "334317 133.930 4.0 0.850 0jckzrST0anXNTY13KMbSw 2005 \n", + "334318 114.801 4.0 0.892 0jckzrST0anXNTY13KMbSw 2005 \n", + "334319 97.032 4.0 0.929 0jckzrST0anXNTY13KMbSw 2005 \n", + "334320 99.170 4.0 0.743 0jckzrST0anXNTY13KMbSw 2005 \n", + "334321 173.193 4.0 0.577 0jckzrST0anXNTY13KMbSw 2005 \n", + "334322 106.908 4.0 0.919 0jckzrST0anXNTY13KMbSw 2005 \n", + "334323 90.350 4.0 0.721 0jckzrST0anXNTY13KMbSw 2005 \n", + "334324 128.847 4.0 0.885 0jckzrST0anXNTY13KMbSw 2005 \n", + "334325 114.447 4.0 0.624 0jckzrST0anXNTY13KMbSw 2005 \n", + "334326 167.310 4.0 0.922 0jckzrST0anXNTY13KMbSw 2005 \n", + "334327 75.287 4.0 0.698 0jckzrST0anXNTY13KMbSw 2005 \n", + "334328 95.801 4.0 0.842 0jckzrST0anXNTY13KMbSw 2005 \n", + "334329 129.572 4.0 0.974 0jckzrST0anXNTY13KMbSw 2005 \n", + "334330 139.631 4.0 0.923 0jckzrST0anXNTY13KMbSw 2005 \n", + "334331 123.576 4.0 0.797 0jckzrST0anXNTY13KMbSw 2005 \n", + "334332 131.121 4.0 0.773 0jckzrST0anXNTY13KMbSw 2005 \n", + "334333 145.412 4.0 0.936 0jckzrST0anXNTY13KMbSw 2005 \n", + "334334 178.321 3.0 0.634 0jckzrST0anXNTY13KMbSw 2005 \n", + "334335 83.011 3.0 0.935 0jckzrST0anXNTY13KMbSw 2005 \n", + "334336 102.926 4.0 0.887 0jckzrST0anXNTY13KMbSw 2005 \n", + "334337 132.818 4.0 0.865 0jckzrST0anXNTY13KMbSw 2005 \n", + "334338 126.300 4.0 0.567 0jckzrST0anXNTY13KMbSw 2005 \n", + "334339 113.087 4.0 0.740 0jckzrST0anXNTY13KMbSw 2005 \n", + "334340 123.816 4.0 0.963 0jckzrST0anXNTY13KMbSw 2005 \n", + "334341 114.502 4.0 0.648 0jckzrST0anXNTY13KMbSw 2005 \n", + "334342 144.214 4.0 0.813 0jckzrST0anXNTY13KMbSw 2005 \n", + "334343 114.851 1.0 0.405 0jckzrST0anXNTY13KMbSw 2005 \n", + "334344 105.339 4.0 0.898 0jckzrST0anXNTY13KMbSw 2005 \n", + "334345 182.023 4.0 0.848 0jckzrST0anXNTY13KMbSw 2005 \n", + "334346 120.245 4.0 0.650 0jckzrST0anXNTY13KMbSw 2005 \n", + "334347 85.184 4.0 0.765 0jckzrST0anXNTY13KMbSw 2005 \n", + "334348 122.515 4.0 0.867 0jckzrST0anXNTY13KMbSw 2005 \n", + "334349 124.802 4.0 0.873 0jckzrST0anXNTY13KMbSw 2005 \n", + "\n", + " rank album_length weeks_top \n", + "241999 NaN NaN NaN \n", + "334300 NaN NaN NaN \n", + "334301 NaN NaN NaN \n", + "334302 NaN NaN NaN \n", + "334303 NaN NaN NaN \n", + "334304 NaN NaN NaN \n", + "334305 NaN NaN NaN \n", + "334306 NaN NaN NaN \n", + "334307 NaN NaN NaN \n", + "334308 NaN NaN NaN \n", + "334309 NaN NaN NaN \n", + "334310 NaN NaN NaN \n", + "334311 NaN NaN NaN \n", + "334312 NaN NaN NaN \n", + "334313 NaN NaN NaN \n", + "334314 NaN NaN NaN \n", + "334315 NaN NaN NaN \n", + "334316 NaN NaN NaN \n", + "334317 NaN NaN NaN \n", + "334318 NaN NaN NaN \n", + "334319 NaN NaN NaN \n", + "334320 NaN NaN NaN \n", + "334321 NaN NaN NaN \n", + "334322 NaN NaN NaN \n", + "334323 NaN NaN NaN \n", + "334324 NaN NaN NaN \n", + "334325 NaN NaN NaN \n", + "334326 NaN NaN NaN \n", + "334327 NaN NaN NaN \n", + "334328 NaN NaN NaN \n", + "334329 NaN NaN NaN \n", + "334330 NaN NaN NaN \n", + "334331 NaN NaN NaN \n", + "334332 NaN NaN NaN \n", + "334333 NaN NaN NaN \n", + "334334 NaN NaN NaN \n", + "334335 NaN NaN NaN \n", + "334336 NaN NaN NaN \n", + "334337 NaN NaN NaN \n", + "334338 NaN NaN NaN \n", + "334339 NaN NaN NaN \n", + "334340 NaN NaN NaN \n", + "334341 NaN NaN NaN \n", + "334342 NaN NaN NaN \n", + "334343 NaN NaN NaN \n", + "334344 NaN NaN NaN \n", + "334345 NaN NaN NaN \n", + "334346 NaN NaN NaN \n", + "334347 NaN NaN NaN \n", + "334348 NaN NaN NaN \n", + "334349 NaN NaN NaN \n", + "\n", + "[51 rows x 22 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[df_ranking['rank'].isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking.dropna(subset=[\"rank\"],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "song 0\n", + "album 0\n", + "artist 0\n", + "acousticness 5\n", + "danceability 5\n", + "duration_ms 5\n", + "energy 5\n", + "instrumentalness 5\n", + "key 5\n", + "liveness 5\n", + "loudness 5\n", + "mode 5\n", + "speechiness 5\n", + "tempo 5\n", + "time_signature 5\n", + "valence 5\n", + "album_id 0\n", + "date 0\n", + "rank 0\n", + "album_length 0\n", + "weeks_top 0\n", + "dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id object\n", + "song object\n", + "album object\n", + "artist object\n", + "acousticness float64\n", + "danceability float64\n", + "duration_ms float64\n", + "energy float64\n", + "instrumentalness float64\n", + "key float64\n", + "liveness float64\n", + "loudness float64\n", + "mode float64\n", + "speechiness float64\n", + "tempo float64\n", + "time_signature float64\n", + "valence float64\n", + "album_id object\n", + "date datetime64[ns]\n", + "rank float64\n", + "album_length float64\n", + "weeks_top float64\n", + "dtype: object" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[\"date\"] = pd.to_datetime(df_ranking[\"date\"])\n", + "df_ranking.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.182143-0.107886-0.7103060.122013-0.027495-0.009117-0.5597770.0772210.015607-0.181678-0.183628-0.2222750.0409180.1125580.019238
danceability-0.1821431.000000-0.0629800.111136-0.1951270.022850-0.1493990.139794-0.0543020.153791-0.1000580.2039100.545940-0.024367-0.063765-0.021480
duration_ms-0.107886-0.0629801.0000000.0593780.0538160.0037080.0734630.032735-0.051887-0.0685700.0138740.056457-0.126540-0.005704-0.040620-0.001930
energy-0.7103060.1111360.0593781.000000-0.1272210.0327370.1745210.762424-0.0672450.1154650.2196660.1906270.313298-0.030172-0.079614-0.021812
instrumentalness0.122013-0.1951270.053816-0.1272211.000000-0.011255-0.052372-0.282310-0.053474-0.104155-0.027658-0.045885-0.1745940.0640750.093708-0.003262
key-0.0274950.0228500.0037080.032737-0.0112551.000000-0.0015570.020048-0.1577060.0279930.0021520.0067780.0253400.001331-0.006069-0.002369
liveness-0.009117-0.1493990.0734630.174521-0.052372-0.0015571.0000000.0625700.0117190.2356810.002506-0.012652-0.034016-0.0070860.058215-0.006417
loudness-0.5597770.1397940.0327350.762424-0.2823100.0200480.0625701.000000-0.0180380.0075240.1782240.1618840.192464-0.049147-0.116179-0.022634
mode0.077221-0.054302-0.051887-0.067245-0.053474-0.1577060.011719-0.0180381.000000-0.0763680.011123-0.0129780.0093700.0042670.0126620.015574
speechiness0.0156070.153791-0.0685700.115465-0.1041550.0279930.2356810.007524-0.0763681.000000-0.030372-0.0101850.029355-0.0204770.014656-0.013571
tempo-0.181678-0.1000580.0138740.219666-0.0276580.0021520.0025060.1782240.011123-0.0303721.0000000.0598490.112866-0.003569-0.0332320.000926
time_signature-0.1836280.2039100.0564570.190627-0.0458850.006778-0.0126520.161884-0.012978-0.0101850.0598491.0000000.162042-0.011134-0.041868-0.004096
valence-0.2222750.545940-0.1265400.313298-0.1745940.025340-0.0340160.1924640.0093700.0293550.1128660.1620421.0000000.008667-0.0234290.006013
rank0.040918-0.024367-0.005704-0.0301720.0640750.001331-0.007086-0.0491470.004267-0.020477-0.003569-0.0111340.0086671.0000000.010702-0.093513
album_length0.112558-0.063765-0.040620-0.0796140.093708-0.0060690.058215-0.1161790.0126620.014656-0.033232-0.041868-0.0234290.0107021.0000000.071388
weeks_top0.019238-0.021480-0.001930-0.021812-0.003262-0.002369-0.006417-0.0226340.015574-0.0135710.000926-0.0040960.006013-0.0935130.0713881.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.182143 -0.107886 -0.710306 \n", + "danceability -0.182143 1.000000 -0.062980 0.111136 \n", + "duration_ms -0.107886 -0.062980 1.000000 0.059378 \n", + "energy -0.710306 0.111136 0.059378 1.000000 \n", + "instrumentalness 0.122013 -0.195127 0.053816 -0.127221 \n", + "key -0.027495 0.022850 0.003708 0.032737 \n", + "liveness -0.009117 -0.149399 0.073463 0.174521 \n", + "loudness -0.559777 0.139794 0.032735 0.762424 \n", + "mode 0.077221 -0.054302 -0.051887 -0.067245 \n", + "speechiness 0.015607 0.153791 -0.068570 0.115465 \n", + "tempo -0.181678 -0.100058 0.013874 0.219666 \n", + "time_signature -0.183628 0.203910 0.056457 0.190627 \n", + "valence -0.222275 0.545940 -0.126540 0.313298 \n", + "rank 0.040918 -0.024367 -0.005704 -0.030172 \n", + "album_length 0.112558 -0.063765 -0.040620 -0.079614 \n", + "weeks_top 0.019238 -0.021480 -0.001930 -0.021812 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.122013 -0.027495 -0.009117 -0.559777 0.077221 \n", + "danceability -0.195127 0.022850 -0.149399 0.139794 -0.054302 \n", + "duration_ms 0.053816 0.003708 0.073463 0.032735 -0.051887 \n", + "energy -0.127221 0.032737 0.174521 0.762424 -0.067245 \n", + "instrumentalness 1.000000 -0.011255 -0.052372 -0.282310 -0.053474 \n", + "key -0.011255 1.000000 -0.001557 0.020048 -0.157706 \n", + "liveness -0.052372 -0.001557 1.000000 0.062570 0.011719 \n", + "loudness -0.282310 0.020048 0.062570 1.000000 -0.018038 \n", + "mode -0.053474 -0.157706 0.011719 -0.018038 1.000000 \n", + "speechiness -0.104155 0.027993 0.235681 0.007524 -0.076368 \n", + "tempo -0.027658 0.002152 0.002506 0.178224 0.011123 \n", + "time_signature -0.045885 0.006778 -0.012652 0.161884 -0.012978 \n", + "valence -0.174594 0.025340 -0.034016 0.192464 0.009370 \n", + "rank 0.064075 0.001331 -0.007086 -0.049147 0.004267 \n", + "album_length 0.093708 -0.006069 0.058215 -0.116179 0.012662 \n", + "weeks_top -0.003262 -0.002369 -0.006417 -0.022634 0.015574 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.015607 -0.181678 -0.183628 -0.222275 0.040918 \n", + "danceability 0.153791 -0.100058 0.203910 0.545940 -0.024367 \n", + "duration_ms -0.068570 0.013874 0.056457 -0.126540 -0.005704 \n", + "energy 0.115465 0.219666 0.190627 0.313298 -0.030172 \n", + "instrumentalness -0.104155 -0.027658 -0.045885 -0.174594 0.064075 \n", + "key 0.027993 0.002152 0.006778 0.025340 0.001331 \n", + "liveness 0.235681 0.002506 -0.012652 -0.034016 -0.007086 \n", + "loudness 0.007524 0.178224 0.161884 0.192464 -0.049147 \n", + "mode -0.076368 0.011123 -0.012978 0.009370 0.004267 \n", + "speechiness 1.000000 -0.030372 -0.010185 0.029355 -0.020477 \n", + "tempo -0.030372 1.000000 0.059849 0.112866 -0.003569 \n", + "time_signature -0.010185 0.059849 1.000000 0.162042 -0.011134 \n", + "valence 0.029355 0.112866 0.162042 1.000000 0.008667 \n", + "rank -0.020477 -0.003569 -0.011134 0.008667 1.000000 \n", + "album_length 0.014656 -0.033232 -0.041868 -0.023429 0.010702 \n", + "weeks_top -0.013571 0.000926 -0.004096 0.006013 -0.093513 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.112558 0.019238 \n", + "danceability -0.063765 -0.021480 \n", + "duration_ms -0.040620 -0.001930 \n", + "energy -0.079614 -0.021812 \n", + "instrumentalness 0.093708 -0.003262 \n", + "key -0.006069 -0.002369 \n", + "liveness 0.058215 -0.006417 \n", + "loudness -0.116179 -0.022634 \n", + "mode 0.012662 0.015574 \n", + "speechiness 0.014656 -0.013571 \n", + "tempo -0.033232 0.000926 \n", + "time_signature -0.041868 -0.004096 \n", + "valence -0.023429 0.006013 \n", + "rank 0.010702 -0.093513 \n", + "album_length 1.000000 0.071388 \n", + "weeks_top 0.071388 1.000000 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[df_ranking['rank']>=40].corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "#sns.pairplot(df_ranking)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import linear_model\n", + "from sklearn.preprocessing import PolynomialFeatures, StandardScaler\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'song', 'album', 'artist', 'acousticness', 'danceability',\n", + " 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',\n", + " 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',\n", + " 'album_id', 'date', 'rank', 'album_length', 'weeks_top'],\n", + " dtype='object')" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_top
00.05550.754142301.00.6630.0000006.00.101-6.3110.00.42790.1954.00.20720.03.0
10.29200.860152829.00.4180.0000007.00.106-9.0610.00.158126.0234.00.37420.03.0
20.15300.718215305.00.4540.0000468.00.116-9.0121.00.12789.4834.00.19620.03.0
30.01530.581189487.00.6620.0000009.00.111-5.2391.00.30393.0234.00.43420.03.0
40.02350.736192779.00.6220.0000006.00.151-4.6530.00.133191.9714.00.50620.03.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness key \\\n", + "0 0.0555 0.754 142301.0 0.663 0.000000 6.0 \n", + "1 0.2920 0.860 152829.0 0.418 0.000000 7.0 \n", + "2 0.1530 0.718 215305.0 0.454 0.000046 8.0 \n", + "3 0.0153 0.581 189487.0 0.662 0.000000 9.0 \n", + "4 0.0235 0.736 192779.0 0.622 0.000000 6.0 \n", + "\n", + " liveness loudness mode speechiness tempo time_signature valence \\\n", + "0 0.101 -6.311 0.0 0.427 90.195 4.0 0.207 \n", + "1 0.106 -9.061 0.0 0.158 126.023 4.0 0.374 \n", + "2 0.116 -9.012 1.0 0.127 89.483 4.0 0.196 \n", + "3 0.111 -5.239 1.0 0.303 93.023 4.0 0.434 \n", + "4 0.151 -4.653 0.0 0.133 191.971 4.0 0.506 \n", + "\n", + " album_length weeks_top \n", + "0 20.0 3.0 \n", + "1 20.0 3.0 \n", + "2 20.0 3.0 \n", + "3 20.0 3.0 \n", + "4 20.0 3.0 " + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = df_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = df_ranking[\"rank\"]\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nclf = linear_model.SGDRegressor()\\nclf.fit(x_train, y_train)\\n\\n# clf is a trained model\\n\\ny_predicted = clf.predict(x_test)\\n\\n'" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\"\"\"\n", + "clf = linear_model.SGDRegressor()\n", + "clf.fit(x_train, y_train)\n", + "\n", + "# clf is a trained model\n", + "\n", + "y_predicted = clf.predict(x_test)\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "#clf.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# Create linear regression\n", + "regr = linear_model.LinearRegression()\n", + "\n", + "# Fit the linear regression\n", + "model = regr.fit(x_train, y_train)\n", + "# model = regr.fit(X_inter, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06136701672338518" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06176784039100125" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "knnr = KNeighborsRegressor(n_neighbors = 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "modelkn = knnr.fit(x_train, y_train) #fit the model\n", + "y_pred = knnr.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.031727221643046866" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "modelkn.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "tree = DecisionTreeRegressor(random_state = 29)\n", + "model_tree = tree.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10875448097643847" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_tree.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.196245-0.120949-0.7062070.118138-0.023548-0.006694-0.5648740.0806220.008277-0.175597-0.186598-0.2191840.0785270.101783-0.001565
danceability-0.1962451.000000-0.0599580.110363-0.1909940.022932-0.1526060.147712-0.0702430.174174-0.1104150.2049450.528247-0.063117-0.051154-0.012421
duration_ms-0.120949-0.0599581.0000000.0679770.0456240.0047580.0676540.045632-0.048022-0.0792620.0187410.060452-0.123794-0.007234-0.0439100.009410
energy-0.7062070.1103630.0679771.000000-0.1194900.0302060.1689780.760206-0.0681250.1018270.2138480.1922080.312593-0.054419-0.069048-0.004840
instrumentalness0.118138-0.1909940.045624-0.1194901.000000-0.007926-0.034715-0.275935-0.040215-0.106122-0.022825-0.046484-0.1579120.1024550.094062-0.003196
key-0.0235480.0229320.0047580.030206-0.0079261.000000-0.0005650.016887-0.1622580.031916-0.0024990.0063370.026532-0.000747-0.003424-0.002516
liveness-0.006694-0.1526060.0676540.168978-0.034715-0.0005651.0000000.0443880.0107720.2214600.002441-0.014402-0.0267700.0118810.0688290.009502
loudness-0.5648740.1477120.0456320.760206-0.2759350.0168870.0443881.000000-0.029998-0.0011240.1709740.1691110.179312-0.101858-0.102410-0.019761
mode0.080622-0.070243-0.048022-0.068125-0.040215-0.1622580.010772-0.0299981.000000-0.0852740.014724-0.015830-0.0004690.0242170.0076100.012935
speechiness0.0082770.174174-0.0792620.101827-0.1061220.0319160.221460-0.001124-0.0852741.000000-0.031299-0.0076320.047409-0.0448720.028152-0.019129
tempo-0.175597-0.1104150.0187410.213848-0.022825-0.0024990.0024410.1709740.014724-0.0312991.0000000.0577680.099147-0.003032-0.0324500.000883
time_signature-0.1865980.2049450.0604520.192208-0.0464840.006337-0.0144020.169111-0.015830-0.0076320.0577681.0000000.159691-0.025005-0.0349460.003075
valence-0.2191840.528247-0.1237940.312593-0.1579120.026532-0.0267700.179312-0.0004690.0474090.0991470.1596911.0000000.014298-0.0171170.018637
rank0.078527-0.063117-0.007234-0.0544190.102455-0.0007470.011881-0.1018580.024217-0.044872-0.003032-0.0250050.0142981.0000000.023332-0.184133
album_length0.101783-0.051154-0.043910-0.0690480.094062-0.0034240.068829-0.1024100.0076100.028152-0.032450-0.034946-0.0171170.0233321.0000000.092760
weeks_top-0.001565-0.0124210.009410-0.004840-0.003196-0.0025160.009502-0.0197610.012935-0.0191290.0008830.0030750.018637-0.1841330.0927601.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.196245 -0.120949 -0.706207 \n", + "danceability -0.196245 1.000000 -0.059958 0.110363 \n", + "duration_ms -0.120949 -0.059958 1.000000 0.067977 \n", + "energy -0.706207 0.110363 0.067977 1.000000 \n", + "instrumentalness 0.118138 -0.190994 0.045624 -0.119490 \n", + "key -0.023548 0.022932 0.004758 0.030206 \n", + "liveness -0.006694 -0.152606 0.067654 0.168978 \n", + "loudness -0.564874 0.147712 0.045632 0.760206 \n", + "mode 0.080622 -0.070243 -0.048022 -0.068125 \n", + "speechiness 0.008277 0.174174 -0.079262 0.101827 \n", + "tempo -0.175597 -0.110415 0.018741 0.213848 \n", + "time_signature -0.186598 0.204945 0.060452 0.192208 \n", + "valence -0.219184 0.528247 -0.123794 0.312593 \n", + "rank 0.078527 -0.063117 -0.007234 -0.054419 \n", + "album_length 0.101783 -0.051154 -0.043910 -0.069048 \n", + "weeks_top -0.001565 -0.012421 0.009410 -0.004840 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.118138 -0.023548 -0.006694 -0.564874 0.080622 \n", + "danceability -0.190994 0.022932 -0.152606 0.147712 -0.070243 \n", + "duration_ms 0.045624 0.004758 0.067654 0.045632 -0.048022 \n", + "energy -0.119490 0.030206 0.168978 0.760206 -0.068125 \n", + "instrumentalness 1.000000 -0.007926 -0.034715 -0.275935 -0.040215 \n", + "key -0.007926 1.000000 -0.000565 0.016887 -0.162258 \n", + "liveness -0.034715 -0.000565 1.000000 0.044388 0.010772 \n", + "loudness -0.275935 0.016887 0.044388 1.000000 -0.029998 \n", + "mode -0.040215 -0.162258 0.010772 -0.029998 1.000000 \n", + "speechiness -0.106122 0.031916 0.221460 -0.001124 -0.085274 \n", + "tempo -0.022825 -0.002499 0.002441 0.170974 0.014724 \n", + "time_signature -0.046484 0.006337 -0.014402 0.169111 -0.015830 \n", + "valence -0.157912 0.026532 -0.026770 0.179312 -0.000469 \n", + "rank 0.102455 -0.000747 0.011881 -0.101858 0.024217 \n", + "album_length 0.094062 -0.003424 0.068829 -0.102410 0.007610 \n", + "weeks_top -0.003196 -0.002516 0.009502 -0.019761 0.012935 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.008277 -0.175597 -0.186598 -0.219184 0.078527 \n", + "danceability 0.174174 -0.110415 0.204945 0.528247 -0.063117 \n", + "duration_ms -0.079262 0.018741 0.060452 -0.123794 -0.007234 \n", + "energy 0.101827 0.213848 0.192208 0.312593 -0.054419 \n", + "instrumentalness -0.106122 -0.022825 -0.046484 -0.157912 0.102455 \n", + "key 0.031916 -0.002499 0.006337 0.026532 -0.000747 \n", + "liveness 0.221460 0.002441 -0.014402 -0.026770 0.011881 \n", + "loudness -0.001124 0.170974 0.169111 0.179312 -0.101858 \n", + "mode -0.085274 0.014724 -0.015830 -0.000469 0.024217 \n", + "speechiness 1.000000 -0.031299 -0.007632 0.047409 -0.044872 \n", + "tempo -0.031299 1.000000 0.057768 0.099147 -0.003032 \n", + "time_signature -0.007632 0.057768 1.000000 0.159691 -0.025005 \n", + "valence 0.047409 0.099147 0.159691 1.000000 0.014298 \n", + "rank -0.044872 -0.003032 -0.025005 0.014298 1.000000 \n", + "album_length 0.028152 -0.032450 -0.034946 -0.017117 0.023332 \n", + "weeks_top -0.019129 0.000883 0.003075 0.018637 -0.184133 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.101783 -0.001565 \n", + "danceability -0.051154 -0.012421 \n", + "duration_ms -0.043910 0.009410 \n", + "energy -0.069048 -0.004840 \n", + "instrumentalness 0.094062 -0.003196 \n", + "key -0.003424 -0.002516 \n", + "liveness 0.068829 0.009502 \n", + "loudness -0.102410 -0.019761 \n", + "mode 0.007610 0.012935 \n", + "speechiness 0.028152 -0.019129 \n", + "tempo -0.032450 0.000883 \n", + "time_signature -0.034946 0.003075 \n", + "valence -0.017117 0.018637 \n", + "rank 0.023332 -0.184133 \n", + "album_length 1.000000 0.092760 \n", + "weeks_top 0.092760 1.000000 " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10788110644999849" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.15080483992299143" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "poly 4_model.score(x_test, y_test) 0.10449768259651138\n" + ] + } + ], + "source": [ + "\"\"\"poly_model = make_pipeline (PolynomialFeatures(4), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)\"\"\"\n", + "print(\"poly 4_model.score(x_test, y_test)\",0.10449768259651138)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.decomposition import PCA, FastICA" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "pca = PCA(n_components=0.98, whiten=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "x_pca = pca.fit_transform(x_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original number of features: (271839, 15)\n", + "Reduced number of features: (271839, 1)\n" + ] + } + ], + "source": [ + "print(\"Original number of features:\", x_train.shape)\n", + "print(\"Reduced number of features:\", x_pca.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5.679213538612037e-17" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9999981606742969" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'song', 'album', 'artist', 'acousticness', 'danceability',\n", + " 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',\n", + " 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',\n", + " 'album_id', 'date'],\n", + " dtype='object')" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "acoustic_features = df_acoustic_features.groupby(by=[\"artist\",'album'],as_index=False).agg({'acousticness':\"mean\", 'danceability':\"mean\",\n", + " 'duration_ms':\"mean\", 'energy':\"mean\", 'instrumentalness':\"mean\", 'key':\"mean\", 'liveness':\"mean\",\n", + " 'loudness':\"mean\", 'mode':\"mean\", 'speechiness':\"mean\", 'tempo':\"mean\", 'time_signature':\"mean\", 'valence':\"mean\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumacousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalence
0Roots Of Country Music (1965)0.9551200.5914183874.120.403120.1236934.80.235784-10.556440.940.07342125.944583.840.82632
1Silhouette0.9350000.7310223507.000.167000.0000006.00.124000-5.990000.000.0297094.932004.000.27400
2!!! (Chk Chk Chk)Myth Takes0.0919730.6465290017.300.831800.2548665.40.233740-6.939000.500.06929116.136204.000.56120
\n", + "
" + ], + "text/plain": [ + " artist album acousticness \\\n", + "0 Roots Of Country Music (1965) 0.955120 \n", + "1 Silhouette 0.935000 \n", + "2 !!! (Chk Chk Chk) Myth Takes 0.091973 \n", + "\n", + " danceability duration_ms energy instrumentalness key liveness \\\n", + "0 0.5914 183874.12 0.40312 0.123693 4.8 0.235784 \n", + "1 0.7310 223507.00 0.16700 0.000000 6.0 0.124000 \n", + "2 0.6465 290017.30 0.83180 0.254866 5.4 0.233740 \n", + "\n", + " loudness mode speechiness tempo time_signature valence \n", + "0 -10.55644 0.94 0.07342 125.94458 3.84 0.82632 \n", + "1 -5.99000 0.00 0.02970 94.93200 4.00 0.27400 \n", + "2 -6.93900 0.50 0.06929 116.13620 4.00 0.56120 " + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acoustic_features.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "albums.drop(columns=[\"album_length\",\"track_length\" ], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "album_rankings = acoustic_features.merge(albums, how=\"left\", on=[\"artist\",\"album\"] )" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "album_rankings.dropna(how=\"any\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalenceweeks_top
20.0919730.646500290017.3000000.8318002.548659e-015.4000000.233740-6.9390000.5000000.069290116.1362004.0000000.5612001.0
30.1035430.617333224445.7500000.7794171.455423e-036.7500000.186483-8.7333330.9166670.055342126.1877504.0000000.73425026.0
40.1803320.635833230081.0833330.5824171.541667e-075.1666670.143817-11.0303330.8333330.053600138.0076674.0000000.6762506.0
50.1026700.602583211534.5000000.6899178.552500e-065.8333330.178708-8.4510830.7500000.055700128.0610003.9166670.67708356.0
60.2059180.716818205242.4545450.6499093.612727e-065.3636360.122418-9.8486360.7272730.064791142.5630914.0000000.77063626.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness \\\n", + "2 0.091973 0.646500 290017.300000 0.831800 2.548659e-01 \n", + "3 0.103543 0.617333 224445.750000 0.779417 1.455423e-03 \n", + "4 0.180332 0.635833 230081.083333 0.582417 1.541667e-07 \n", + "5 0.102670 0.602583 211534.500000 0.689917 8.552500e-06 \n", + "6 0.205918 0.716818 205242.454545 0.649909 3.612727e-06 \n", + "\n", + " key liveness loudness mode speechiness tempo \\\n", + "2 5.400000 0.233740 -6.939000 0.500000 0.069290 116.136200 \n", + "3 6.750000 0.186483 -8.733333 0.916667 0.055342 126.187750 \n", + "4 5.166667 0.143817 -11.030333 0.833333 0.053600 138.007667 \n", + "5 5.833333 0.178708 -8.451083 0.750000 0.055700 128.061000 \n", + "6 5.363636 0.122418 -9.848636 0.727273 0.064791 142.563091 \n", + "\n", + " time_signature valence weeks_top \n", + "2 4.000000 0.561200 1.0 \n", + "3 4.000000 0.734250 26.0 \n", + "4 4.000000 0.676250 6.0 \n", + "5 3.916667 0.677083 56.0 \n", + "6 4.000000 0.770636 26.0 " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = album_rankings.drop(columns=[\"album\",\"artist\",\"rank\",\"date_start\",\"date_end\",\"days_top\"])\n", + "y = album_rankings[\"rank\"]\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.1082450723739965" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.06.0...0.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.07.0...0.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
\n", + "

2 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "\n", + " instrumentalness key ... mode speechiness tempo time_signature \\\n", + "0 0.0 6.0 ... 0.0 0.427 90.195 4.0 \n", + "1 0.0 7.0 ... 0.0 0.158 126.023 4.0 \n", + "\n", + " valence album_id date rank album_length weeks_top \n", + "0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "1 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "\n", + "[2 rows x 22 columns]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_40 = df_ranking[df_ranking[\"rank\"]<=40]\n", + "top_40.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "131333" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = top_40.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = top_40[\"rank\"]\n", + "x.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.1006890820016928" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "recent_ranking = df_ranking[df_ranking.date > '2004-12-31']\n", + "old_ranking = df_ranking[df_ranking.date <= '2004-12-31']" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(1991, 1, 12, 0, 0)" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import datetime\n", + "df_albums.date.max()\n", + "a = datetime(1963, 1, 5)\n", + "b = datetime(2019, 1 ,19)\n", + "a + (b - a)/2" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9993704141733303" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_ranking.id.count()/recent_ranking.id.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_top
00.05550.754142301.00.6630.06.00.101-6.3110.00.42790.1954.00.20720.03.0
10.29200.860152829.00.4180.07.00.106-9.0610.00.158126.0234.00.37420.03.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness key \\\n", + "0 0.0555 0.754 142301.0 0.663 0.0 6.0 \n", + "1 0.2920 0.860 152829.0 0.418 0.0 7.0 \n", + "\n", + " liveness loudness mode speechiness tempo time_signature valence \\\n", + "0 0.101 -6.311 0.0 0.427 90.195 4.0 0.207 \n", + "1 0.106 -9.061 0.0 0.158 126.023 4.0 0.374 \n", + "\n", + " album_length weeks_top \n", + "0 20.0 3.0 \n", + "1 20.0 3.0 " + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = recent_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = recent_ranking[\"rank\"]\n", + "x.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12799465635727159" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "x = old_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = old_ranking[\"rank\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12799465635727159" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6128328806147163" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flash_hits = df_ranking[df_ranking.weeks_top <= 13]\n", + "flash_hits.energy.count()/df_ranking.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "x = flash_hits.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = flash_hits[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.24703788868644594" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "test_ranking = df_ranking[df_ranking.date > '1961-12-31']\n", + "#test_ranking = df_ranking[df_ranking.date > '2008-12-31']" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8850850061359804" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking = test_ranking[test_ranking.weeks_top <= 52]\n", + "\n", + "test_ranking.energy.count()/df_ranking.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "x = test_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = test_ranking[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.37654698821670396" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = poly_model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.193424-0.114031-0.7055590.120800-0.024165-0.006213-0.5651240.0802290.000599-0.175110-0.185099-0.2145120.0805850.1132210.026523
danceability-0.1934241.000000-0.0611570.106031-0.1925370.022180-0.1557260.144868-0.0689590.176857-0.1125430.2034490.529373-0.066055-0.0462640.083294
duration_ms-0.114031-0.0611571.0000000.0635900.0479740.0041260.0686720.039692-0.046986-0.0786290.0178480.058427-0.121044-0.006523-0.037612-0.003142
energy-0.7055590.1060310.0635901.000000-0.1227640.0303700.1694830.761897-0.0674790.1093290.2139650.1896630.305588-0.058473-0.080271-0.078506
instrumentalness0.120800-0.1925370.047974-0.1227641.000000-0.009171-0.040985-0.277801-0.040583-0.111850-0.023195-0.045723-0.1608710.1005940.094270-0.048149
key-0.0241650.0221800.0041260.030370-0.0091711.000000-0.0007340.017070-0.1653980.031355-0.0021990.0067280.025884-0.002136-0.0032520.004185
liveness-0.006213-0.1557260.0686720.169483-0.040985-0.0007341.0000000.0476590.0134820.2191410.002841-0.014975-0.0309200.0070930.080717-0.041585
loudness-0.5651240.1448680.0396920.761897-0.2778010.0170700.0476591.000000-0.0303750.0088340.1704330.1644820.177095-0.105550-0.102282-0.123773
mode0.080229-0.068959-0.046986-0.067479-0.040583-0.1653980.013482-0.0303751.000000-0.0838050.015805-0.0161390.0014470.0299960.0117020.004643
speechiness0.0005990.176857-0.0786290.109329-0.1118500.0313550.2191410.008834-0.0838051.000000-0.032465-0.0050640.046756-0.0581290.037621-0.032551
tempo-0.175110-0.1125430.0178480.213965-0.023195-0.0021990.0028410.1704330.015805-0.0324651.0000000.0567560.097036-0.003143-0.033271-0.017837
time_signature-0.1850990.2034490.0584270.189663-0.0457230.006728-0.0149750.164482-0.016139-0.0050640.0567561.0000000.157769-0.024061-0.0318220.016646
valence-0.2145120.529373-0.1210440.305588-0.1608710.025884-0.0309200.1770950.0014470.0467560.0970360.1577691.0000000.014829-0.0120780.101173
rank0.080585-0.066055-0.006523-0.0584730.100594-0.0021360.007093-0.1055500.029996-0.058129-0.003143-0.0240610.0148291.0000000.029841-0.503327
album_length0.113221-0.046264-0.037612-0.0802710.094270-0.0032520.080717-0.1022820.0117020.037621-0.033271-0.031822-0.0120780.0298411.000000-0.066330
weeks_top0.0265230.083294-0.003142-0.078506-0.0481490.004185-0.041585-0.1237730.004643-0.032551-0.0178370.0166460.101173-0.503327-0.0663301.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.193424 -0.114031 -0.705559 \n", + "danceability -0.193424 1.000000 -0.061157 0.106031 \n", + "duration_ms -0.114031 -0.061157 1.000000 0.063590 \n", + "energy -0.705559 0.106031 0.063590 1.000000 \n", + "instrumentalness 0.120800 -0.192537 0.047974 -0.122764 \n", + "key -0.024165 0.022180 0.004126 0.030370 \n", + "liveness -0.006213 -0.155726 0.068672 0.169483 \n", + "loudness -0.565124 0.144868 0.039692 0.761897 \n", + "mode 0.080229 -0.068959 -0.046986 -0.067479 \n", + "speechiness 0.000599 0.176857 -0.078629 0.109329 \n", + "tempo -0.175110 -0.112543 0.017848 0.213965 \n", + "time_signature -0.185099 0.203449 0.058427 0.189663 \n", + "valence -0.214512 0.529373 -0.121044 0.305588 \n", + "rank 0.080585 -0.066055 -0.006523 -0.058473 \n", + "album_length 0.113221 -0.046264 -0.037612 -0.080271 \n", + "weeks_top 0.026523 0.083294 -0.003142 -0.078506 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.120800 -0.024165 -0.006213 -0.565124 0.080229 \n", + "danceability -0.192537 0.022180 -0.155726 0.144868 -0.068959 \n", + "duration_ms 0.047974 0.004126 0.068672 0.039692 -0.046986 \n", + "energy -0.122764 0.030370 0.169483 0.761897 -0.067479 \n", + "instrumentalness 1.000000 -0.009171 -0.040985 -0.277801 -0.040583 \n", + "key -0.009171 1.000000 -0.000734 0.017070 -0.165398 \n", + "liveness -0.040985 -0.000734 1.000000 0.047659 0.013482 \n", + "loudness -0.277801 0.017070 0.047659 1.000000 -0.030375 \n", + "mode -0.040583 -0.165398 0.013482 -0.030375 1.000000 \n", + "speechiness -0.111850 0.031355 0.219141 0.008834 -0.083805 \n", + "tempo -0.023195 -0.002199 0.002841 0.170433 0.015805 \n", + "time_signature -0.045723 0.006728 -0.014975 0.164482 -0.016139 \n", + "valence -0.160871 0.025884 -0.030920 0.177095 0.001447 \n", + "rank 0.100594 -0.002136 0.007093 -0.105550 0.029996 \n", + "album_length 0.094270 -0.003252 0.080717 -0.102282 0.011702 \n", + "weeks_top -0.048149 0.004185 -0.041585 -0.123773 0.004643 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.000599 -0.175110 -0.185099 -0.214512 0.080585 \n", + "danceability 0.176857 -0.112543 0.203449 0.529373 -0.066055 \n", + "duration_ms -0.078629 0.017848 0.058427 -0.121044 -0.006523 \n", + "energy 0.109329 0.213965 0.189663 0.305588 -0.058473 \n", + "instrumentalness -0.111850 -0.023195 -0.045723 -0.160871 0.100594 \n", + "key 0.031355 -0.002199 0.006728 0.025884 -0.002136 \n", + "liveness 0.219141 0.002841 -0.014975 -0.030920 0.007093 \n", + "loudness 0.008834 0.170433 0.164482 0.177095 -0.105550 \n", + "mode -0.083805 0.015805 -0.016139 0.001447 0.029996 \n", + "speechiness 1.000000 -0.032465 -0.005064 0.046756 -0.058129 \n", + "tempo -0.032465 1.000000 0.056756 0.097036 -0.003143 \n", + "time_signature -0.005064 0.056756 1.000000 0.157769 -0.024061 \n", + "valence 0.046756 0.097036 0.157769 1.000000 0.014829 \n", + "rank -0.058129 -0.003143 -0.024061 0.014829 1.000000 \n", + "album_length 0.037621 -0.033271 -0.031822 -0.012078 0.029841 \n", + "weeks_top -0.032551 -0.017837 0.016646 0.101173 -0.503327 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.113221 0.026523 \n", + "danceability -0.046264 0.083294 \n", + "duration_ms -0.037612 -0.003142 \n", + "energy -0.080271 -0.078506 \n", + "instrumentalness 0.094270 -0.048149 \n", + "key -0.003252 0.004185 \n", + "liveness 0.080717 -0.041585 \n", + "loudness -0.102282 -0.123773 \n", + "mode 0.011702 0.004643 \n", + "speechiness 0.037621 -0.032551 \n", + "tempo -0.033271 -0.017837 \n", + "time_signature -0.031822 0.016646 \n", + "valence -0.012078 0.101173 \n", + "rank 0.029841 -0.503327 \n", + "album_length 1.000000 -0.066330 \n", + "weeks_top -0.066330 1.000000 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.37654698821670396" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import r2_score\n", + "\n", + "r2_score(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)\\nmodel_clf= clf.fit(x_train, y_train)\\nmodel_clf.score(x_test, y_test)'" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)\n", + "model_clf= clf.fit(x_train, y_train)\n", + "model_clf.score(x_test, y_test)\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "mask_corr = test_ranking.corr() " + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
danceabilityNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
duration_msNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
energyNaNNaNNaN1.000000NaNNaNNaN0.761897NaNNaNNaNNaNNaNNaNNaNNaN
instrumentalnessNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
keyNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
livenessNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
loudnessNaNNaNNaN0.761897NaNNaNNaN1.000000NaNNaNNaNNaNNaNNaNNaNNaN
modeNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaN
speechinessNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaN
tempoNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaN
time_signatureNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaN
valenceNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN
rankNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaN
album_lengthNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaN
weeks_topNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.0 NaN NaN NaN \n", + "danceability NaN 1.0 NaN NaN \n", + "duration_ms NaN NaN 1.0 NaN \n", + "energy NaN NaN NaN 1.000000 \n", + "instrumentalness NaN NaN NaN NaN \n", + "key NaN NaN NaN NaN \n", + "liveness NaN NaN NaN NaN \n", + "loudness NaN NaN NaN 0.761897 \n", + "mode NaN NaN NaN NaN \n", + "speechiness NaN NaN NaN NaN \n", + "tempo NaN NaN NaN NaN \n", + "time_signature NaN NaN NaN NaN \n", + "valence NaN NaN NaN NaN \n", + "rank NaN NaN NaN NaN \n", + "album_length NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness NaN NaN NaN NaN NaN \n", + "danceability NaN NaN NaN NaN NaN \n", + "duration_ms NaN NaN NaN NaN NaN \n", + "energy NaN NaN NaN 0.761897 NaN \n", + "instrumentalness 1.0 NaN NaN NaN NaN \n", + "key NaN 1.0 NaN NaN NaN \n", + "liveness NaN NaN 1.0 NaN NaN \n", + "loudness NaN NaN NaN 1.000000 NaN \n", + "mode NaN NaN NaN NaN 1.0 \n", + "speechiness NaN NaN NaN NaN NaN \n", + "tempo NaN NaN NaN NaN NaN \n", + "time_signature NaN NaN NaN NaN NaN \n", + "valence NaN NaN NaN NaN NaN \n", + "rank NaN NaN NaN NaN NaN \n", + "album_length NaN NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN NaN \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness NaN NaN NaN NaN NaN \n", + "danceability NaN NaN NaN NaN NaN \n", + "duration_ms NaN NaN NaN NaN NaN \n", + "energy NaN NaN NaN NaN NaN \n", + "instrumentalness NaN NaN NaN NaN NaN \n", + "key NaN NaN NaN NaN NaN \n", + "liveness NaN NaN NaN NaN NaN \n", + "loudness NaN NaN NaN NaN NaN \n", + "mode NaN NaN NaN NaN NaN \n", + "speechiness 1.0 NaN NaN NaN NaN \n", + "tempo NaN 1.0 NaN NaN NaN \n", + "time_signature NaN NaN 1.0 NaN NaN \n", + "valence NaN NaN NaN 1.0 NaN \n", + "rank NaN NaN NaN NaN 1.0 \n", + "album_length NaN NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN NaN \n", + "\n", + " album_length weeks_top \n", + "acousticness NaN NaN \n", + "danceability NaN NaN \n", + "duration_ms NaN NaN \n", + "energy NaN NaN \n", + "instrumentalness NaN NaN \n", + "key NaN NaN \n", + "liveness NaN NaN \n", + "loudness NaN NaN \n", + "mode NaN NaN \n", + "speechiness NaN NaN \n", + "tempo NaN NaN \n", + "time_signature NaN NaN \n", + "valence NaN NaN \n", + "rank NaN NaN \n", + "album_length 1.0 NaN \n", + "weeks_top NaN 1.0 " + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask_corr[mask_corr>0.7]" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "test_ranking.loc[test_ranking['rank'] <= 10, 'rank'] = 1\n", + "test_ranking.loc[test_ranking['rank'] > 10, 'rank'] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "x = test_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = test_ranking[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"from sklearn.preprocessing import MinMaxScaler, StandardScaler # what's the difference??\\n\\nscaler = MinMaxScaler(feature_range=(-2,2))\\nds['thalach'] = scaler.fit_transform(ds.thalach.values.reshape(-1, 1))\\nds.describe()\"" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"from sklearn.preprocessing import MinMaxScaler, StandardScaler # what's the difference??\n", + "\n", + "scaler = MinMaxScaler(feature_range=(-2,2))\n", + "ds['thalach'] = scaler.fit_transform(ds.thalach.values.reshape(-1, 1))\n", + "ds.describe()\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.8750477963791126" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr = LogisticRegression()\n", + "log_mod = lr.fit(x_train,y_train)\n", + "\n", + "y_pred = log_mod.predict(x_test)\n", + "log_mod.score(x_test,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8562118668018819" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "dtc = DecisionTreeClassifier()\n", + "dtc.fit(x_train, y_train)\n", + "\n", + "dtc.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'from sklearn.svm import SVC\\nsvm = SVC()\\nsvm.fit(x_train, y_train)\\n\\nacc = svm.score(x_test,y_test)'" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"from sklearn.svm import SVC\n", + "svm = SVC()\n", + "svm.fit(x_train, y_train)\n", + "\n", + "acc = svm.score(x_test,y_test)\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8760452860301574" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "knn = KNeighborsClassifier(n_neighbors = 23) # n_neighbors means k\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.model_selection import GridSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': 23}" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "param_grid = {'n_neighbors': np.arange(1,30)}\n", + "knn = KNeighborsClassifier()\n", + "knn_cv = GridSearchCV(knn, param_grid, cv=5)\n", + "knn_cv.fit(x_train,y_train)\n", + "knn_cv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8699938488138186" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn = KNeighborsClassifier(n_neighbors = 2) # n_neighbors means k\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " class 0 0.88 0.99 0.93 52690\n", + " class 1 0.35 0.06 0.10 7461\n", + "\n", + " accuracy 0.87 60151\n", + " macro avg 0.61 0.52 0.51 60151\n", + "weighted avg 0.81 0.87 0.83 60151\n", + "\n" + ] + } + ], + "source": [ + "target_names = ['class 0', 'class 1']\n", + "print(classification_report(y_test, y_pred, target_names=target_names))" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted Not StwDevPredicted StwDev
True Not StwDev4242310267
True StwDev48942567
\n", + "
" + ], + "text/plain": [ + " Predicted Not StwDev Predicted StwDev\n", + "True Not StwDev 42423 10267\n", + "True StwDev 4894 2567" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "\n", + "cm = pd.DataFrame(\n", + " confusion_matrix(y_test, y_pred),\n", + " columns=['Predicted Not StwDev', 'Predicted StwDev'],\n", + " index=['True Not StwDev', 'True StwDev'])\n", + "\n", + "cm" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "52316 374 7142 319\n" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", + "print(tn, fp, fn, tp)" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 263582\n", + "1.0 37169\n", + "Name: rank, dtype: int64" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking[\"rank\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12358728649281299" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "37169/ (263582 + 37169)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0 29708\n", + "0.0 29708\n", + "Name: rank, dtype: int64" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# resample the data\n", + "\n", + "from sklearn.utils import resample\n", + "\n", + "# concatenate our training data back together\n", + "\n", + "x_concat = pd.concat([x_train, y_train], axis=1)\n", + "\n", + "# separate minority and majority classes\n", + "\n", + "non_hit = x_concat[x_concat['rank']==0]\n", + "\n", + "hit = x_concat[x_concat['rank']==1]\n", + "\n", + "y_train.value_counts()\n", + "\n", + "# downsample non occupied sample\n", + "\n", + "non_hit_downsampled = resample(non_hit,\n", + "\n", + " replace = False, # sample without replacement\n", + "\n", + " n_samples = len(hit), # match minority n\n", + "\n", + " random_state = 29) # reproducible results\n", + "\n", + "# combine minority and downsampled majority\n", + "\n", + "downsampled = pd.concat([non_hit_downsampled, hit])\n", + "\n", + "# checking counts\n", + "\n", + "downsampled[\"rank\"].value_counts()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_toprank
2711890.37800.824180307.00.5170.00004310.00.0432-12.3961.00.0366124.0444.00.963012.018.00.0
2173060.86400.46661000.00.9630.00010210.00.7750-14.7530.00.8720102.6234.00.032215.03.00.0
1014930.04740.707279960.00.8380.0009215.00.0955-5.4800.00.0515117.9654.00.861013.02.00.0
595280.25100.435280376.00.7320.4420006.00.1450-10.0340.00.0275162.3664.00.751011.03.00.0
896370.67500.46996853.00.3170.0442002.00.3220-10.8631.00.0264159.8413.00.377024.01.00.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness \\\n", + "271189 0.3780 0.824 180307.0 0.517 0.000043 \n", + "217306 0.8640 0.466 61000.0 0.963 0.000102 \n", + "101493 0.0474 0.707 279960.0 0.838 0.000921 \n", + "59528 0.2510 0.435 280376.0 0.732 0.442000 \n", + "89637 0.6750 0.469 96853.0 0.317 0.044200 \n", + "\n", + " key liveness loudness mode speechiness tempo time_signature \\\n", + "271189 10.0 0.0432 -12.396 1.0 0.0366 124.044 4.0 \n", + "217306 10.0 0.7750 -14.753 0.0 0.8720 102.623 4.0 \n", + "101493 5.0 0.0955 -5.480 0.0 0.0515 117.965 4.0 \n", + "59528 6.0 0.1450 -10.034 0.0 0.0275 162.366 4.0 \n", + "89637 2.0 0.3220 -10.863 1.0 0.0264 159.841 3.0 \n", + "\n", + " valence album_length weeks_top rank \n", + "271189 0.9630 12.0 18.0 0.0 \n", + "217306 0.0322 15.0 3.0 0.0 \n", + "101493 0.8610 13.0 2.0 0.0 \n", + "59528 0.7510 11.0 3.0 0.0 \n", + "89637 0.3770 24.0 1.0 0.0 " + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "downsampled.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "x_traind = downsampled.drop(columns=[\"rank\"])\n", + "y_traind = downsampled[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.8038436601220262" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr = LogisticRegression()\n", + "log_mod = lr.fit(x_traind,y_traind)\n", + "y_pred = log_mod.predict(x_test)\n", + "log_mod.score(x_test,y_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted Not StwDevPredicted StwDev
True Not StwDev429619729
True StwDev20705391
\n", + "
" + ], + "text/plain": [ + " Predicted Not StwDev Predicted StwDev\n", + "True Not StwDev 42961 9729\n", + "True StwDev 2070 5391" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cm = pd.DataFrame(\n", + " confusion_matrix(y_test, y_pred),\n", + " columns=['Predicted Not StwDev', 'Predicted StwDev'],\n", + " index=['True Not StwDev', 'True StwDev'])\n", + "\n", + "cm" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "42961 9729 2070 5391\n" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", + "print(tn, fp, fn, tp)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " class 0 0.95 0.82 0.88 52690\n", + " class 1 0.36 0.72 0.48 7461\n", + "\n", + " accuracy 0.80 60151\n", + " macro avg 0.66 0.77 0.68 60151\n", + "weighted avg 0.88 0.80 0.83 60151\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, y_pred, target_names=target_names))" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.750494588618643" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dtc = DecisionTreeClassifier()\n", + "dtc.fit(x_traind, y_traind)\n", + "\n", + "dtc.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7479509900084786" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn = KNeighborsClassifier(n_neighbors = 2) # n_neighbors means k\n", + "knn.fit(x_traind, y_traind)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "not enough values to unpack (expected 4, got 2)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 4, got 2)" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8153539571076106" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42961/(42961+9729)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From f8fef4de601fe08c23a2ca8397774afba1ce1ab8 Mon Sep 17 00:00:00 2001 From: naivm <36080596+naivm@users.noreply.github.com> Date: Wed, 2 Oct 2019 12:18:52 +0100 Subject: [PATCH 2/2] Update Project_README.md --- your-code/Project_README.md | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/your-code/Project_README.md b/your-code/Project_README.md index 7174412..71616da 100644 --- a/your-code/Project_README.md +++ b/your-code/Project_README.md @@ -16,31 +16,27 @@ ## Project Description -This Prject goal is to practice what we have learned in Business Intelligence and Tableau. By making chosing Dataset(s) and making a Story through the visualizations in Tableau. +This Prject goal is to practice what we have learned in Machine Learning. By making chosing Dataset and making a Model through the sklearn library. ## Hypotheses / Questions -How are the two shop ecosystems different? -Who spends more money? And on what? -Who is most engarged? +How accuratly can we predict a song to be in the top 200? +or Top 10? ## Dataset -Data sets removed from Kaggle +Data sets removed from Spotify API + +[Dataset](https://components.one/datasets/billboard-200/) -[Dataset Apple](https://www.kaggle.com/ramamet4/app-store-apple-data-set-10k-apps#AppleStore.csv) -[Dataset Google](https://www.kaggle.com/gauthamp10/google-playstore-apps#Google-Playstore-Full.csv) ## Workflow -Data collection --> Data cleaning --> Data visualization(Exploration phase) --> Create Dashboard --> Present Story +Data collection --> Data cleaning --> Model Testing --> Model Comparisons -We decided to compare two data sets from different app stores. First we had to clean unwanted columns from both data sets. Then we cheked for duplicates in terms of App Names. Then cheked for errors in the columns like non numeric ratings and non text categories. The Category/Genre columns had different values for each store, so we made the adjustments to make them equal. After all that cleaning it was time to concatenate. But before concat we reanme the remaining columns to the same names and made a column to have an identifier for each store. A column full of "A"s to mark the apple store and one of "G"s for the google one. -Finally we concatenated and exported it as a excel to be read by Tableau. //n -Inside Tableau we tried to make every graph that could expose the differences between both stores. Manly relating things like Price, Categories and Ratings @@ -52,6 +48,5 @@ Trello ## Links Include the links to your repository, slides and trello. Feel free to include any other links associated to your project. -[Repository](https://github.com/naivm/Project-Week-6-Tableau) -[Tableau Public](https://public.tableau.com/profile/miguel.vian#!/vizhome/Project6-StorevsPlay/Dashboard1) -[Trello](https://trello.com/invite/b/ywUwW4In/4c9c5895f202192bd3cf0c2896f4817b/project-5) +[Repository](https://github.com/naivm/supervised-learning-project) +[Trello](https://trello.com/invite/b/Pxgi4jVk/4b6e916923c88d4310b78989f3650721/project-6)