diff --git a/your-code/Project_README.md b/your-code/Project_README.md new file mode 100644 index 0000000..71616da --- /dev/null +++ b/your-code/Project_README.md @@ -0,0 +1,52 @@ +Ironhack Logo + +# App Store vs Google Play? +*Miguel Vian and Alona Sorochynska* + +*Data Squad 21 Lisbon 20/09/19* + +## Content +- [Project Description](#project-description) +- [Hypotheses / Questions](#hypotheses-/-questions) +- [Dataset](#dataset) +- [Workflow](#workflow) +- [Organization](#organization) +- [Links](#links) + + + +## Project Description +This Prject goal is to practice what we have learned in Machine Learning. By making chosing Dataset and making a Model through the sklearn library. + + + +## Hypotheses / Questions +How accuratly can we predict a song to be in the top 200? +or Top 10? + + + +## Dataset +Data sets removed from Spotify API + +[Dataset](https://components.one/datasets/billboard-200/) + + + + +## Workflow +Data collection --> Data cleaning --> Model Testing --> Model Comparisons + + + + +## Organization +Trello + + + +## Links +Include the links to your repository, slides and trello. Feel free to include any other links associated to your project. + +[Repository](https://github.com/naivm/supervised-learning-project) +[Trello](https://trello.com/invite/b/Pxgi4jVk/4b6e916923c88d4310b78989f3650721/project-6) diff --git a/your-code/spotify_project.ipynb b/your-code/spotify_project.ipynb new file mode 100644 index 0000000..7bd4019 --- /dev/null +++ b/your-code/spotify_project.ipynb @@ -0,0 +1,8169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Create your connection.\n", + "cnx = sqlite3.connect('billboard-200.db')\n", + "\n", + "df_albums = pd.read_sql_query(\"SELECT * FROM albums\", cnx)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "df_acoustic_features = pd.read_sql_query(\"SELECT * FROM acoustic_features\", cnx)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_iddate
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.0000006.00.101-6.3110.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-21
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.0000007.00.106-9.0610.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-21
218yllZD0TdF7ykcREib8Z1I Did ItHoodie SZNA Boogie Wit da Hoodie0.15300.718215305.00.4540.0000468.00.116-9.0121.00.12789.4834.00.1963r5hf3Cj3EMh1C2saQ8jyt2018-12-21
31wJRveJZLSb1rjhnUHQiv6Swervin (feat. 6ix9ine)Hoodie SZNA Boogie Wit da Hoodie0.01530.581189487.00.6620.0000009.00.111-5.2391.00.30393.0234.00.4343r5hf3Cj3EMh1C2saQ8jyt2018-12-21
40jAfdqv18goRTUxm3ilRjbStartender (feat. Offset and Tyga)Hoodie SZNA Boogie Wit da Hoodie0.02350.736192779.00.6220.0000006.00.151-4.6530.00.133191.9714.00.5063r5hf3Cj3EMh1C2saQ8jyt2018-12-21
\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "2 18yllZD0TdF7ykcREib8Z1 I Did It Hoodie SZN \n", + "3 1wJRveJZLSb1rjhnUHQiv6 Swervin (feat. 6ix9ine) Hoodie SZN \n", + "4 0jAfdqv18goRTUxm3ilRjb Startender (feat. Offset and Tyga) Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "2 A Boogie Wit da Hoodie 0.1530 0.718 215305.0 0.454 \n", + "3 A Boogie Wit da Hoodie 0.0153 0.581 189487.0 0.662 \n", + "4 A Boogie Wit da Hoodie 0.0235 0.736 192779.0 0.622 \n", + "\n", + " instrumentalness key liveness loudness mode speechiness tempo \\\n", + "0 0.000000 6.0 0.101 -6.311 0.0 0.427 90.195 \n", + "1 0.000000 7.0 0.106 -9.061 0.0 0.158 126.023 \n", + "2 0.000046 8.0 0.116 -9.012 1.0 0.127 89.483 \n", + "3 0.000000 9.0 0.111 -5.239 1.0 0.303 93.023 \n", + "4 0.000000 6.0 0.151 -4.653 0.0 0.133 191.971 \n", + "\n", + " time_signature valence album_id date \n", + "0 4.0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "1 4.0 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "2 4.0 0.196 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "3 4.0 0.434 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 \n", + "4 4.0 0.506 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddateartistalbumranklengthtrack_length
01NoneNoneNoneNoneNaNNaN
122019-01-19A Boogie Wit da HoodieHoodie SZN120.0185233.800000
232019-01-1921 SavageI Am > I Was215.0211050.733333
342019-01-19SoundtrackSpider-Man: Into The Spider-Verse313.0190866.384615
452019-01-19Meek MillChampionships419.0219173.894737
\n", + "
" + ], + "text/plain": [ + " id date artist album \\\n", + "0 1 None None None \n", + "1 2 2019-01-19 A Boogie Wit da Hoodie Hoodie SZN \n", + "2 3 2019-01-19 21 Savage I Am > I Was \n", + "3 4 2019-01-19 Soundtrack Spider-Man: Into The Spider-Verse \n", + "4 5 2019-01-19 Meek Mill Championships \n", + "\n", + " rank length track_length \n", + "0 None NaN NaN \n", + "1 1 20.0 185233.800000 \n", + "2 2 15.0 211050.733333 \n", + "3 3 13.0 190866.384615 \n", + "4 4 19.0 219173.894737 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalence
count339850.000000339850.0000003.398500e+05339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000339850.000000
mean0.2988060.5411762.367351e+050.6167160.0982775.2395260.233439-9.0727590.7057000.092625119.9339743.8875470.506169
std0.3155330.1708211.022557e+050.2440430.2419803.5533700.2152564.6010320.4557280.12889430.3808990.4536730.253193
min0.0000000.0000001.062000e+030.0000000.0000000.0000000.000000-60.0000000.0000000.0000000.0000000.0000000.000000
25%0.0215000.4250001.865218e+050.4400000.0000002.0000000.098200-11.5750000.0000000.03340096.1882504.0000000.302000
50%0.1640000.5470002.263730e+050.6470000.0000535.0000000.141000-8.1210001.0000000.044700118.2230004.0000000.506000
75%0.5460000.6640002.710000e+050.8220000.0111008.0000000.299000-5.6790001.0000000.084000139.2880004.0000000.712000
max0.9960000.9880004.734079e+061.0000001.00000011.0000001.0000003.7440001.0000000.968000247.8240005.0000001.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "count 339850.000000 339850.000000 3.398500e+05 339850.000000 \n", + "mean 0.298806 0.541176 2.367351e+05 0.616716 \n", + "std 0.315533 0.170821 1.022557e+05 0.244043 \n", + "min 0.000000 0.000000 1.062000e+03 0.000000 \n", + "25% 0.021500 0.425000 1.865218e+05 0.440000 \n", + "50% 0.164000 0.547000 2.263730e+05 0.647000 \n", + "75% 0.546000 0.664000 2.710000e+05 0.822000 \n", + "max 0.996000 0.988000 4.734079e+06 1.000000 \n", + "\n", + " instrumentalness key liveness loudness \\\n", + "count 339850.000000 339850.000000 339850.000000 339850.000000 \n", + "mean 0.098277 5.239526 0.233439 -9.072759 \n", + "std 0.241980 3.553370 0.215256 4.601032 \n", + "min 0.000000 0.000000 0.000000 -60.000000 \n", + "25% 0.000000 2.000000 0.098200 -11.575000 \n", + "50% 0.000053 5.000000 0.141000 -8.121000 \n", + "75% 0.011100 8.000000 0.299000 -5.679000 \n", + "max 1.000000 11.000000 1.000000 3.744000 \n", + "\n", + " mode speechiness tempo time_signature \\\n", + "count 339850.000000 339850.000000 339850.000000 339850.000000 \n", + "mean 0.705700 0.092625 119.933974 3.887547 \n", + "std 0.455728 0.128894 30.380899 0.453673 \n", + "min 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.033400 96.188250 4.000000 \n", + "50% 1.000000 0.044700 118.223000 4.000000 \n", + "75% 1.000000 0.084000 139.288000 4.000000 \n", + "max 1.000000 0.968000 247.824000 5.000000 \n", + "\n", + " valence \n", + "count 339850.000000 \n", + "mean 0.506169 \n", + "std 0.253193 \n", + "min 0.000000 \n", + "25% 0.302000 \n", + "50% 0.506000 \n", + "75% 0.712000 \n", + "max 1.000000 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlengthtrack_length
count573947.000000492547.0000004.688670e+05
mean286974.00000013.7289332.443541e+05
std165684.3718127.9606407.195498e+04
min1.0000001.0000002.542290e+04
25%143487.50000010.0000002.100814e+05
50%286974.00000012.0000002.372190e+05
75%430460.50000015.0000002.680566e+05
max573947.000000666.0000004.457023e+06
\n", + "
" + ], + "text/plain": [ + " id length track_length\n", + "count 573947.000000 492547.000000 4.688670e+05\n", + "mean 286974.000000 13.728933 2.443541e+05\n", + "std 165684.371812 7.960640 7.195498e+04\n", + "min 1.000000 1.000000 2.542290e+04\n", + "25% 143487.500000 10.000000 2.100814e+05\n", + "50% 286974.000000 12.000000 2.372190e+05\n", + "75% 430460.500000 15.000000 2.680566e+05\n", + "max 573947.000000 666.000000 4.457023e+06" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "date object\n", + "artist object\n", + "album object\n", + "rank object\n", + "length float64\n", + "track_length float64\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "33012" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df_albums.album.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "date 1\n", + "artist 1\n", + "album 1\n", + "rank 1\n", + "length 81400\n", + "track_length 105080\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "573917 True\n", + "573918 True\n", + "573919 True\n", + "573920 True\n", + "573921 True\n", + "573922 True\n", + "573923 True\n", + "573924 True\n", + "573925 True\n", + "573926 True\n", + "573927 True\n", + "573928 True\n", + "573929 True\n", + "573930 True\n", + "573931 True\n", + "573932 True\n", + "573933 True\n", + "573934 True\n", + "573935 True\n", + "573936 True\n", + "573937 True\n", + "573938 True\n", + "573939 True\n", + "573940 True\n", + "573941 True\n", + "573942 True\n", + "573943 True\n", + "573944 True\n", + "573945 True\n", + "573946 True\n", + "Length: 573947, dtype: bool" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_albums.duplicated(subset=[\"album\",\"artist\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums.dropna(subset=[\"album\"], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums[\"rank\"] = pd.to_numeric(df_albums[\"rank\"], downcast=\"integer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums[\"date\"] = pd.to_datetime(df_albums[\"date\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2019-01-19 00:00:00')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max(df_albums[\"date\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "albums = df_albums.groupby(by=\"album\", as_index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df_albums.sort_values([\"album\",\"date\"], axis=0, ascending=False, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#df_albums.sort_values('rank', ascending=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "albums = df_albums.sort_values('date').groupby(by=[\"artist\",'album'],as_index=False).agg({\"date\":['first','last'],\"rank\":\"min\",\"length\":\"mean\",\"track_length\":\"mean\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "albums = albums.droplevel(1, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "albums.columns = [\"artist\",'album', 'date_start', 'date_end', 'rank', 'length', 'track_length']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_length
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN
1Silhouette1988-10-221989-11-1881.0230416.000000
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.000000
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length \n", + "0 107 72.0 NaN \n", + "1 8 1.0 230416.000000 \n", + "2 195 10.0 290017.000000 \n", + "3 17 NaN NaN \n", + "4 46 12.0 224445.166667 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "albums[\"days_top\"]= (albums.date_end-albums.date_start).astype('timedelta64[D]')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Silhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top \n", + "0 107 72.0 NaN 42.0 \n", + "1 8 1.0 230416.000000 392.0 \n", + "2 195 10.0 290017.000000 0.0 \n", + "3 17 NaN NaN 154.0 \n", + "4 46 12.0 224445.166667 175.0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 36338.000000\n", + "mean 257.963619\n", + "std 1191.680746\n", + "min 0.000000\n", + "25% 7.000000\n", + "50% 56.000000\n", + "75% 147.000000\n", + "max 18823.000000\n", + "Name: days_top, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums[\"days_top\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Roots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Silhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
18\"Weird Al\" YankovicWeird Al Yankovic1983-05-211983-07-02139NaNNaN42.0
17\"Weird Al\" YankovicThe Essential \"Weird Al\" Yankovic2009-11-142009-11-1417838.0241838.9736840.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start \\\n", + "0 Roots Of Country Music (1965) 1965-07-10 \n", + "1 Silhouette 1988-10-22 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 \n", + "18 \"Weird Al\" Yankovic Weird Al Yankovic 1983-05-21 \n", + "17 \"Weird Al\" Yankovic The Essential \"Weird Al\" Yankovic 2009-11-14 \n", + "\n", + " date_end rank length track_length days_top \n", + "0 1965-08-21 107 72.0 NaN 42.0 \n", + "1 1989-11-18 8 1.0 230416.000000 392.0 \n", + "2 2007-03-24 195 10.0 290017.000000 0.0 \n", + "18 1983-07-02 139 NaN NaN 42.0 \n", + "17 2009-11-14 178 38.0 241838.973684 0.0 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#sns.pairplot(albums)\n", + "albums.isna().sum()#\n", + "albums.sort_values(\"artist\").head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3327" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.album.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "albums.at[1, 'artist'] = \"Kenny G\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "albums.at[0, 'artist'] = \"Various Artists\"" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top \n", + "0 107 72.0 NaN 42.0 \n", + "1 8 1.0 230416.000000 392.0 \n", + "2 195 10.0 290017.000000 0.0 \n", + "3 17 NaN NaN 154.0 \n", + "4 46 12.0 224445.166667 175.0 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "albums.weeks_top = (albums.days_top + 7) / 7" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEGCAYAAACO8lkDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deXxc1X338c9Po122JFuSV9mWV8A2xoABQygESMAkKSYFErMV+tDSpNA2bdPXA08TmtDQB5ImpCmQhgQKpQFMSPKgJAaHNUACxrJjG8vCtjBeZHmRbVmb0f57/pgrZ9CMrNE6Wr7v12temjn33DPnWtZ8595z77nm7oiIiERKSnQHRERk6FE4iIhIFIWDiIhEUTiIiEgUhYOIiERJTnQH+kN+fr4XFRUluhsiIsPKunXrDrl7QaxlIyIcioqKKCkpSXQ3RESGFTPb1dUyHVYSEZEoCgcREYmicBARkSgKBxERiaJwEBGRKHGFg5ktM7OtZlZuZnfEWJ5mZiuD5WvMrCgozzOzV82s3sweiKg/1sw2RDwOmdl3g2U3m1lVxLI/759NFRGReHV7KquZhYAHgU8CFcBaMyt29y0R1W4Bqt19jpmtAO4DPg80Al8FFgYPANy9Dlgc8R7rgJ9FtLfS3W/v9VaJiEifxLPncDZQ7u473L0ZeBpY3qnOcuDx4PmzwCVmZu7e4O5vEg6JmMxsLjABeKPHvRcRkQERTzhMBfZEvK4IymLWcfdWoAbIi7MP1xLeU4i8scRVZrbJzJ41s2mxVjKzW82sxMxKqqqq4nwrERGJRzxXSFuMss53CIqnTldWADdGvP4F8JS7N5nZFwjvkVwc1bj7w8DDAEuWLBl2dyx6cs3uLpddd870QeyJiEi0ePYcKoDIb++FQGVXdcwsGcgBjnTXsJmdBiS7+7qOMnc/7O5NwcsfAmfG0UcREelH8YTDWmCumc00s1TC3/SLO9UpBm4Knl8NvOLx3X/0WuCpyAIzmxzx8gqgLI52RESkH3V7WMndW83sdmA1EAIedfdSM7sbKHH3YuAR4AkzKye8x7CiY30z2wlkA6lmdiVwacSZTp8DPtXpLf/GzK4AWoO2bu7D9omISC/ENSuru68CVnUquyvieSNwTRfrFp2g3Vkxyu4E7oynXyIiMjB0hbSIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRIkrHMxsmZltNbNyM7sjxvI0M1sZLF9jZkVBeZ6ZvWpm9Wb2QKd1Xgva3BA8JpyoLRERGTzdhoOZhYAHgcuB+cC1Zja/U7VbgGp3nwPcD9wXlDcCXwW+3EXz17v74uBxsJu2RERkkMSz53A2UO7uO9y9GXgaWN6pznLg8eD5s8AlZmbu3uDubxIOiXjFbKsH64uISB/FEw5TgT0RryuCsph13L0VqAHy4mj7v4JDSl+NCIC42jKzW82sxMxKqqqq4ngrERGJVzzhEOtbu/eiTmfXu/upwB8Fjxt70pa7P+zuS9x9SUFBQTdvJSIiPRFPOFQA0yJeFwKVXdUxs2QgBzhyokbdfW/wsw54kvDhq161JSIi/SuecFgLzDWzmWaWCqwAijvVKQZuCp5fDbzi7l3uOZhZspnlB89TgM8Am3vTloiI9L/k7iq4e6uZ3Q6sBkLAo+5eamZ3AyXuXgw8AjxhZuWEv+Wv6FjfzHYC2UCqmV0JXArsAlYHwRACXgJ+GKzSZVsiIjI4ug0HAHdfBazqVHZXxPNG4Jou1i3qotkzu6jfZVsiIjI4dIW0iIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEiUuMLBzJaZ2VYzKzezO2IsTzOzlcHyNWZWFJTnmdmrZlZvZg9E1M80s1+Z2XtmVmpm90Ysu9nMqsxsQ/D4875vpoiI9ES34WBmIeBB4HJgPnCtmc3vVO0WoNrd5wD3A/cF5Y3AV4Evx2j639z9ZOB04GNmdnnEspXuvjh4/KhHWyQiIn0Wz57D2UC5u+9w92bgaWB5pzrLgceD588Cl5iZuXuDu79JOCSOc/dj7v5q8LwZWA8U9mE7RESkH8UTDlOBPRGvK4KymHXcvRWoAfLi6YCZ5QJ/DLwcUXyVmW0ys2fNbFo87YiISP+JJxwsRpn3ok50w2bJwFPA99x9R1D8C6DI3RcBL/GHPZLO695qZiVmVlJVVdXdW4mISA/EEw4VQOS390Kgsqs6wQd+DnAkjrYfBra7+3c7Ctz9sLs3BS9/CJwZa0V3f9jdl7j7koKCgjjeSkRE4hVPOKwF5prZTDNLBVYAxZ3qFAM3Bc+vBl5x9xPuOZjZNwiHyJc6lU+OeHkFUBZHH0VEpB8ld1fB3VvN7HZgNRACHnX3UjO7Gyhx92LgEeAJMysnvMewomN9M9sJZAOpZnYlcClQC/wT8B6w3swAHgjOTPobM7sCaA3aurmftlVEROLUbTgAuPsqYFWnsrsinjcC13SxblEXzcYap8Dd7wTujKdfIiIyMHSFtIiIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhEiSsczGyZmW01s3IzuyPG8jQzWxksX2NmRUF5npm9amb1ZvZAp3XONLN3g3W+Z2YWlI83sxfNbHvwc1zfN1NERHqi23AwsxDwIHA5MB+41szmd6p2C1Dt7nOA+4H7gvJG4KvAl2M0/X3gVmBu8FgWlN8BvOzuc4GXg9ciIjKI4tlzOBsod/cd7t4MPA0s71RnOfB48PxZ4BIzM3dvcPc3CYfEcWY2Gch297fc3YH/Bq6M0dbjEeUiIjJI4gmHqcCeiNcVQVnMOu7eCtQAed20WdFFmxPdfV/Q1j5gQqwGzOxWMysxs5Kqqqo4NkNEROIVTzhYjDLvRZ2+1I+u7P6wuy9x9yUFBQU9WVVERLoRTzhUANMiXhcClV3VMbNkIAc40k2bhV20eSA47NRx+OlgHH0UEZF+FE84rAXmmtlMM0sFVgDFneoUAzcFz68GXgnGEmIKDhfVmdnS4CylPwWei9HWTRHlIiIySJK7q+DurWZ2O7AaCAGPunupmd0NlLh7MfAI8ISZlRPeY1jRsb6Z7QSygVQzuxK41N23AF8EHgMygOeDB8C9wDNmdguwG7imPzZURETi1204ALj7KmBVp7K7Ip430sWHuLsXdVFeAiyMUX4YuCSefomIyMDQFdIiIhJF4SAiIlEUDiIiEkXhICIiURQOIiISReEwBLk7v99dzXv7a2lpa090d0RkFIrrVFYZPG3tzlef28z/vL0bgNRQEpctnMS3rl5Eekoowb0TkdFC4TCENLa08dQ7u9l+sJ5bL5jFginZrN9VzX+/vYtDdU388KYljEnTr0xEBp4+aYaQVe/u4/2qeu676lQ+f9Z0AJYvnsrp08fxDz/ZyI2PrOF/bjmHLAWEiAwwjTkMEdXHmlm/u5qzZ+YdD4YOV54+lYeuP4MNe47yjV9tSVAPRWQ0UTgMEa9vq8IwLpibH3P5ZQsm8YULZ/PUO3v4den+Qe6diIw2CochoPbDFtbtquaMGbnkZqZ2We/vPjGPhVOzueNn73KwrrHLeiIifaVwGALe2F5FuzsXzot507vjUpOT+O7nT+dYcytf+fnmQeqdiIxGGtlMsNa2dkp2VbOoMJfxWeG9hifX7D7hOl/6xDzuff49XtxygE/OnzgY3RSRUUZ7Dgm241ADTa3tnFaYE/c6t5w/k3kTx/C14lKONbcOYO9EZLRSOCRY2b5aUkLGrIIxca+TEkrins+eyt6jH/LvL28fwN6JyGilw0oJ5O6U7atl7oSxpITiz+mOw05nzhjHD1/fQVpyiEnZ6ceXX3fO9K5WFRGJi/YcEqiyppHaxlbmT87u1frLFkwiPSXEcxv20t71LbtFRHpM4ZBAZftqMWDepLG9Wj8rLZnLF05i1+FjrN9V3b+dE5FRTeGQQGX7apmel9mn+ZJOnz6OorxMnt+8n/omDU6LSP9QOCTI0WPN7Ktp7PUhpQ5JZixfPJXm1nZ+uamyn3onIqNdXOFgZsvMbKuZlZvZHTGWp5nZymD5GjMrilh2Z1C+1cwuC8pOMrMNEY9aM/tSsOxrZrY3Ytmn+mdTh5byg/UAzJvYu0NKkSZmp3PRyQVsqqhh896aPrcnItJtOJhZCHgQuByYD1xrZvM7VbsFqHb3OcD9wH3BuvOBFcACYBnwkJmF3H2ruy9298XAmcAx4OcR7d3fsdzdV/VtE4emXYePkZkaYsLYtH5p78J5E5iSm85zG/ZyuL6pX9oUkdErnj2Hs4Fyd9/h7s3A08DyTnWWA48Hz58FLjEzC8qfdvcmd/8AKA/ai3QJ8L677+rtRgxHu440MH18JuF/pr4LJRlXnzGNxpZ2/unnm3GdvSQifRBPOEwF9kS8rgjKYtZx91agBsiLc90VwFOdym43s01m9qiZjYvVKTO71cxKzKykqqoqjs0YOg7XN3GovpkZeVn92u6knHQ+OX8iL5Tu57Hf7ezXtkVkdIknHGJ9te38tbSrOidc18xSgSuAn0Qs/z4wG1gM7AO+HatT7v6wuy9x9yUFBQVd934IWhecdlqUl9nvbZ8/N59PnDKBf11VxvrdOr1VRHonnnCoAKZFvC4EOp8Wc7yOmSUDOcCRONa9HFjv7gc6Ctz9gLu3uXs78EOiD0MNeyW7qgklGVNyM/q97SQzvn3NYiZmp3P7j9dram8R6ZV4TrBfC8w1s5nAXsKHga7rVKcYuAl4C7gaeMXd3cyKgSfN7DvAFGAu8E7EetfS6ZCSmU12933By88CI25u6pKdR5iam9GjKTN6Iiczhf+84Uyu+c+3+LP/WsvKvzz3I9dSdDfrq6bfEJFuP52CMYTbgdVAGfCMu5ea2d1mdkVQ7REgz8zKgb8H7gjWLQWeAbYALwC3uXsbgJllAp8EftbpLb9pZu+a2SbgIuDv+riNQ0pjSxub99YyYwAOKUVaODWHh244g/f21/GFJ9bR3No+oO8nIiNLXJfmBqeTrupUdlfE80bgmi7WvQe4J0b5McKD1p3Lb4ynT8PVu3traG5rZ8b4/h2MjuWikyZw31WL+PJPNnLbk+t54LrTSUsODfj7isjwpyukB1nJzvAg8fQB3nPocPWZhXz9igW8uOUAtzxWovs/iEhcFA6DbN2uamblZ/VpPqWeuum8Ir519SJ+9/4hrv/RGuoaWwbtvUVkeFI4DLLSyhpO7cFd3/rLNUum8dD1Z1K2r5aHXnufiupjg94HERk+FA6D6EhDeLK9BVP6Ntleby1bOImffvE8DHj49R2s+eCwrqQWkZgUDoOotDI8Kd6CKYO/59BhwZQc/uqiORTlZ/Hchkoef2sntTrMJCKdKBwGUWllLUCfp+nuqzFpydx8XhGfWTSZHVUNfO/l7ZrNVUQ+QuEwiLZU1jIlJ51xWamJ7gpJZpw3O5/bL57DuMxUnnxnNz8p2aOzmUQEiPM6B+kfpZU1zE/gIaVYJoxN5wsXzubVrQd5betBth2oI29MKlcuntpvM8aKyPCjPYdBcqy5lR2HGhI2GH0ioSTjE6dM5LaL5jA+K5W/W7mRGx5Zw46q+kR3TUQSROEwSMr21eHOkAyHDpNzMvjLC2fzjSsXsqmihmX//gbffWkbjS1tie6aiAwyHVYaJFs6zlSaOvCHlbqbWO9Eksy47pzpXLpgIv/yyzK++9J2flJSwVc+fQrLFk7SoSaRUULhMEhKK2vJzUxhSk56orsSlwlj0/mPa0/n2rOncfcvtvDFH6/n3Fl5/PMV8zl50on3fjTrq8jwp8NKg6S0spYFU7KH3Tfv82bn88u/Pp9/Wb6Asv21fOrf3+Cffv6u7hMhMsIpHAZBS1s7W/fXJfTit75IDiVx47lFvPblj3Pj0hmsXLuHC7/5Gt/+9VbN0yQyQikcBkH5wXqa29qH9GB0PHIzU/n68oW89PcXcskpE/iPV8q54Juv8qM3dmjQWmSEUTgMgo4ro4d7OHQoys/igevO4Be3n8+CKTl841dlfPxbr/Hfb+1USIiMEBqQHgSllTWkpyQxM39MorsSl3gHlE8tzOF//vwcfld+iPtf2sZdz5Xy4KvlnF00niVF4wfsNqgiMvAUDoOgtLKWkydlE0oaXoPR8TpvTj7nzs7jrfcP892XtvOLTfv4zbYqLphXwFkKCZFhSX+1A8zdKQvOVBrJzIzz5uSz8i+Xcsv5MxmflcovN+3j27/eyu/eP0RLm+5hLTKcaM9hgO058iF1Ta3D9kylnjIzZheMYVZ+FjsONfBy2UF+GexJXBjsSYjI0BfXnoOZLTOzrWZWbmZ3xFieZmYrg+VrzKwoYtmdQflWM7ssonynmb1rZhvMrCSifLyZvWhm24Of4/q2iYn1h3s4jOw9h846QuLWC2bx5+fPJH9MGr/ctI9/+/VWHn3zAw1ciwxx3e45mFkIeBD4JFABrDWzYnffElHtFqDa3eeY2QrgPuDzZjYfWAEsAKYAL5nZPHfv+GS4yN0PdXrLO4CX3f3eIIjuAP53H7YxoUorawklGSdNGpvorvSbnk7PMatgDLMKxrCjqp6X3zvI3b/cwkOvlXPL+bO4Yel0xqan9Kh9XWEtMvDi2XM4Gyh39x3u3gw8DSzvVGc58Hjw/FngEgtfCrwceNrdm9z9A6A8aO9EItt6HLgyjj4OWaWVNcwpGEN6SijRXUm4WQVj+Is/msXTty7llMnZ3PfCe3zs3lf4zovbqG5oTnT3RCRCPGMOU4E9Ea8rgHO6quPurWZWA+QF5W93Wndq8NyBX5uZAz9w94eD8onuvi9oa5+ZTYjVKTO7FbgVYPr0oftNsrSylvPn5Ce6G0PK0ll5LJ2Vx8Y9R3notXK+9/J2fvTGDq46o5CbzpuR6O6JCPGFQ6zzLzvflb6rOida92PuXhl8+L9oZu+5++tx9CfcSDhMHgZYsmRJ5/4MCVV1TRysa2L+KBtviNdp03L5wY1L2Hagjh/8Zgcr1+7hibd3Mbsgi3Nn5XPy5LEkDbO5qERGingOK1UA0yJeFwKVXdUxs2QgBzhyonXdvePnQeDn/OFw0wEzmxy0NRk4GP/mDC1/GIweHWcq9da8iWP59udO4607L+YfLzuJQ/XN/M+aXfzb6q2sLt3P/hpN8icy2OLZc1gLzDWzmcBewgPM13WqUwzcBLwFXA284u5uZsXAk2b2HcID0nOBd8wsC0hy97rg+aXA3Z3aujf4+VxfNjCROqbNmD9Zew7xyBuTxm0XzSE7PYWyfbWs3XmEN7ZX8ZttVUzMTuO0wlxOjeN+GBrQFum7bsMhGEO4HVgNhIBH3b3UzO4GSty9GHgEeMLMygnvMawI1i01s2eALUArcJu7t5nZRODnwfTVycCT7v5C8Jb3As+Y2S3AbuCaftzeQbWlspbCcRnkZKZ0X1mOCyUZC6fmsHBqDvVNrby7t4ZNe47y6y0H+PWWA/x0fQUXzivgwpMKWDorj8xUXa4j0t/i+qty91XAqk5ld0U8b6SLD3F3vwe4p1PZDuC0LuofBi6Jp19DXWllzai7vqG/jUlL5txZeZw7K4/qhmbK9tfS0NTKypI9PP7WLlJDSSyensuSGeM4c8Y4Tp8+rC+LERky9JVrgNQ3tbLz8DH+5IzCRHdlyOntbUzHZaVy3ux8rjtnOo0tbZTsrOY32w7yzs5qHn59B63t4fMS8sekMX18JjPGZzItL5MJY9M0sC3SQwqHAVK2b2RN0z3UpKeEOH9uPufPDZ8m/GFzG+/urWHdrmqe27CX9/bXsn53NQApIWNKbgaFuRlMHZfB0lnjKcrLImmEToQo0h8UDgOkdK/OVBpMGakhzp45nrNnjicnIwV351B9MxXVx6g4+iF7qz/knZ1HaHnfeaakgrFpySycmsOiwhwWFeayqDCHwnEZw+42rp1pMF76i8JhgJRW1pKXlcrE7LREd2VUMjMKxqZRMDbt+DhEW7tzsK6RaeMy2bT3KJsqanj0tx/Q0hY+HDUuM4VTC3NZNDWHUwtzOK0wl4nZaVGBMZI/gEfytknPKBwGSGllLfOnZA/7b6JDUW/HLEJJxuScDD531jQ+d1b48pum1ja27a9nY8VR3q2oYdPeGr7/m/dpC8YvCsamHQ+LhVNyKMrPpKWtvdf3qNCHb9/o32/wKBwGQHNrO9sP1nHBvFmJ7op0Iy05xKmF4Q//Do0tbZRW1vJuxVE27a1hU0UNr2w9iEdch5+dnsz4rFRyM1MZk5ZMVloyY9JCjElLZlPFUfLGpJGXlUpacpK+IMiwpHAYANsO1NHS5hqMHqLi+fZ5ZnBqbIf6pla2Hahj9+Fj/HJTJUcamjnS0Myuww3UN7UePzQF8Phbu44/Tw0lkZ2RTHZ6CmMzUjjW1Ep6Soj0lCRSQkkkJyWRHDJSkoykJKO+qYV2h3Z33KG93Wlpd9ra22ltd9raPPyz3Wltb6e1zWlzJzWURFpyEjsONZASSiIlyUhJTiI1lERq8DMlOYkNe46SmRoiIyVEZmqI5FASbUF77e4cPdYcft+O93fH+cPrrfvryEoLkZUaDsTU5I/uQQ31b/ZDvX9DicJhAGzpuDJa4TBijElL5ozp4zhj+jiONUffi6K5tZ36plYamlo5c8Y4DtU3cbihmbrGVmobW6j9sIXaxlaO1DdRfayZxpb24x/uHafgAjy/eX/M909OMkJJ9oefoSRCSeFQMTNa2tppam2noan1I+119sibH/Tp3+GBV8s/8jolZOSPSWNKbgZTcjM4eqyZ3IwUxmelkT8mvGc1Um+PO9IpHAZAaWUNmakhZuZlJborMkhSk5MYn5zK+KxUPjF/Ypf1Yn1zbXc//u3982dNI8kMMzCDJAsHQryHpp5cs/t4e82t7TS3tdPc2k5L8PO8OXkca27jWHMbHza30dLWfjxwkpKMkp3VJJmRFLy/mYX7AyQZtDk0tbQdb7expZ36phaqG5r54FADNR+2HB+vIVhnXGYqeWNSyctKo6m1jck56eSPSQs/xqaRlRo64fa1tLUHfW7lYG0jTa3twaMt/LOljZY2J8nCY0jh7UkiIzWJvKw0JmSnMWN8Fhmpmja/JxQOA6C0spZTJmfrPPphqrcD3r1dP8mMpJCREoKstBP/ScbT9h/aS6Lz15OLT+46uAAs5kTK8Wt3p6GplcP1zRxuaOZwsAd1uL6JnYeP8daOw1HrpCaHD4l1fKiHw5DjAdbcg/uPr+pizwtgam4G2RkpFOVlMit/DFNy0zUedAIKh37W1u5s2VfLNWfqymgZfZLMGJuewtj0FIryPxpN7s6lCyZxoLaRww3NHKpr4lB9E0cammluaw/GUcLjKu3u4bGR1GSyUkNkpIbISktmw56jpCcnkZocIi0IlbSUECkhwx3+5Iypx8dkGppaOVTfzIHaRj441MD7VfW8uf3Q8QtUx2WmsKgwl7OKxjM+KzUR/1xDmsKhn5UfrOdYcxunTctNdFdkGOrrXstQFnntSW95N3duWfVu7D2HjsNY58zMo7axhe0H6tlUcZQ3tlfxxvYqTivM5aKTYt5XbNRSOPSzjXuOAigcRIao7PSU42ej1XzYwpvbq3hn5xE2VdSAwRc/PjvmdSyj7UwnhUM/21BxlLHpyRqMliFpJO+Z9EZORgqfXjSFC+YV8Kt39/GdF7fx/Ob9PHT9GczMH91/wwqHfrZxz1FOK8zVYLRIDEM1nMamp7DirOmcVljLT9dX8Kl/f4Mbls4Y1QHRuzkAJKbGljbe21/HadM02Z7IcHTK5Gy+eOFsstKSefS3H7Cp4miiu5QwCod+VFpZS1u7c1qhxhtEhqu8MWl88cLZTBuXwTMle9i6vzbRXUoIhUM/6hiMXqzBaJFhLSM1xJ+eW8SknHSefGc3uw43JLpLg07h0I82VhxlUnY6E7LTE90VEemj9JQQN583k5yMFB5/ayeH65sS3aVBpXDoRxv3HNV4g8gIMiYtmZvPmwnAU+/spqUHV2sPdwqHfnL0WDM7Dx/T9Q0iI8z4rFSuOXMalTWN/GrTvkR3Z9DEFQ5mtszMtppZuZndEWN5mpmtDJavMbOiiGV3BuVbzeyyoGyamb1qZmVmVmpmfxtR/2tmttfMNgSPT/V9Mwfeho7xBg1Gi4w4p0zO5oK5Bbyz8wgbR8kZTN2Gg5mFgAeBy4H5wLVmNr9TtVuAanefA9wP3BesOx9YASwAlgEPBe21Av/g7qcAS4HbOrV5v7svDh6r+rSFg2TNB0dITjIWT1c4iIxEn5w/kWnjMijeUEldY0uiuzPg4tlzOBsod/cd7t4MPA0s71RnOfB48PxZ4BILT3e4HHja3Zvc/QOgHDjb3fe5+3oAd68DyoCpfd+cxHl7x2FOm5ZLZqquKxQZiUJJxlVnFNLS1k7xxspEd2fAxRMOU4E9Ea8riP4gP17H3VuBGiAvnnWDQ1CnA2siim83s01m9qiZjSMGM7vVzErMrKSqqiqOzRg4DU2tbKqoYems8Qnth4gMrAnZ6VxyysTwbWT31iS6OwMqnnCINQ9E57kRu6pzwnXNbAzwU+BL7t5xpcn3gdnAYmAf8O1YnXL3h919ibsvKSgoOPEWDLCSXdW0tTtLZ+UltB8iMvDOn5NP4bgMijdW8mGMuwKOFPGEQwUwLeJ1IdB5n+p4HTNLBnKAIyda18xSCAfDj939Zx0V3P2Au7e5ezvwQ8KHtYa0t3ccJjnJPnLPYREZmUJJxpWLp3KsqZUXy7q+udBwF084rAXmmtlMM0slPMBc3KlOMXBT8Pxq4BV396B8RdlyAQgAAAsJSURBVHA200xgLvBOMB7xCFDm7t+JbMjMJke8/CywuacbNdg03iAyukzJzWDprDzW7DjC3qMfJro7A6LbcAjGEG4HVhMeOH7G3UvN7G4zuyKo9giQZ2blwN8DdwTrlgLPAFuAF4Db3L0N+BhwI3BxjFNWv2lm75rZJuAi4O/6a2MHgsYbREanT5wykay0ZIo37KW9u7sQDUNxfdUNTidd1ansrojnjcA1Xax7D3BPp7I3iT0egbvfGE+fhgqNN4iMThmpIS5fOImfrKtg3a5qblg6I9Fd6le6QrqPNN4gMnotnpZLUV4Wq0v3U93QnOju9CuFQx+9trWKM6aP03iDyChkZlyxeAqNLW18c/V7ie5Ov1I49MHuw8co21fLpQsmJrorIpIgk7LTOW92Pk+v3cPvd1cnujv9RuHQB6tLw6exXbZgUoJ7IiKJdMnJE5gwNo2vPreZtvaRMTitcOiD1aX7mT85m2njMxPdFRFJoLSUEF/59Hw2763lx2t2Jbo7/ULh0EsH6xpZt7taew0iAsBnFk3m/Dn5fGv1Vqrqhv+NgRQOvfTilgO4w7KFCgcRCQ9Of335Ahpb2rj3+eE/OK1w6KUXNu+nKC+TeRPHJLorIjJEzC4Yw1/80Sx+ur6CtTuPJLo7faJw6IUjDc289f5hLlswifBMICIiYbdfPIepuRl89f9tpnUY31ZU4dALT72zm9Z256ozCxPdFREZYjJTk7nrj+fz3v46HvvdzkR3p9cUDj3U0tbOE2/t4vw5+cybODbR3RGRIejS+RP5+EkF3P/iNg7UNia6O72icOihFzbvZ39tI3/2saJEd0VEhigz4+tXLKC13fnHZzfRPgyvfVA49NB//fYDZuRlctFJExLdFREZwmbkZfGVz8zn9W1V/OjNHYnuTo8pHHpg456jrN99lJvOLSIpSQPRInJiN5wzncsWTOSbL2xl456jie5Ojygc4uTu/N/ny8hOT+aaJRqIFpHumRn3XbWICWPT+Ksfr+dg3fAZf1A4xOnZdRW8veMId1x+CmPTUxLdHREZJnIzU/nBjUuoPtbM/3psLfVNrYnuUlwUDnE4XN/EPavKWDJjHCvOmtb9CiIiEU4tzOHB686gbF8df/Xj9TS3Dv3rHxQO3XB3vvaLLTQ0tfKvf3KqxhpEpFcuOnkC91y5kNe3VfFnj71DbWNLort0QgqHE3B3vvGrMn6xsZK/vniurmsQkT5ZcfZ0vnX1ItbsOMLn/vMt9tV8mOgudUnh0AV351urt/LImx9w83lF/PXFcxLdJREZAa5ZMo1Hbz6LPUeOcdn9r/OTkj24D73rIBQOMew5coxbHi/hodfe57pzpvPPfzxfcyiJSL+5YF4BxX99PidNGss/PruJGx5Zw9s7Dg+pkIgrHMxsmZltNbNyM7sjxvI0M1sZLF9jZkURy+4Myrea2WXdtWlmM4M2tgdtpvZtE+O3dX8d9z7/Hpfe/zpv7zjMVz59Ct9YvlDBICL9bnbBGFbeei53L19A2b46Vjz8Nssf/C3/9dsP2H34WKK7h3WXVGYWArYBnwQqgLXAte6+JaLOXwGL3P0LZrYC+Ky7f97M5gNPAWcDU4CXgHnBajHbNLNngJ+5+9Nm9p/ARnf//on6uGTJEi8pKenptrN1fx2/2XaQsn11bKo4yvtVDYSSjEvnT+Qrn5nP1NyMHrcZryfX7B6wtkVk8F13zvRer9vY0sZP11fw2G93sv1gPQCF4zI4ZXI2J00cy6ScdCZmp5OdnkxmajIpyUZbu9PW7kzJzSB/TFqv3tfM1rn7kljLkuNY/2yg3N13BI09DSwHtkTUWQ58LXj+LPCAhb9uLweedvcm4AMzKw/aI1abZlYGXAxcF9R5PGj3hOHQW29sr+JfV73HpOx0Tpk8lpvOK+JTp07u9T+0iEhvpKeEuP6cGVx/zgx2HmrglfcOsm53Ne/tq+XlsgOcaGqmb1y5kBuWzuj3PsUTDlOBPRGvK4Bzuqrj7q1mVgPkBeVvd1p3avA8Vpt5wFF3b41R/yPM7Fbg1uBlvZltjWNbYtoFrAEe620DPZMPHBqctxpStN2jy6jb7usTtM033gc39n71LlMlnnCIdcC9c451Vaer8lhjHSeqH13o/jDwcKxlQ5mZlXS1GzeSabtHl9G43SNtm+MZkK4AIi8LLgQqu6pjZslADnDkBOt2VX4IyA3a6Oq9RERkgMUTDmuBucFZRKnACqC4U51i4Kbg+dXAKx4e6S4GVgRnM80E5gLvdNVmsM6rQRsEbT7X+80TEZHe6PawUjCGcDuwGggBj7p7qZndDZS4ezHwCPBEMOB8hPCHPUG9ZwgPXrcCt7l7G0CsNoO3/N/A02b2DeD3QdsjybA7FNZPtN2jy2jc7hG1zd2eyioiIqOPrpAWEZEoCgcREYmicBhE3U1DMpKY2U4ze9fMNphZSVA23sxeDKZGedHMxiW6n31lZo+a2UEz2xxRFnM7Lex7we9/k5mdkbie914X2/w1M9sb/L43mNmnIpbFnEJnuDGzaWb2qpmVmVmpmf1tUD4if98Kh0ESTEPyIHA5MB+4NpheZCS7yN0XR5z7fQfwsrvPBV4OXg93jwHLOpV1tZ2XEz5jby7hCzgH5Mr/QfAY0dsMcH/w+17s7qsAgv/jK4AFwToPBX8Lw1Er8A/ufgqwFLgt2L4R+ftWOAye49OQuHsz0DENyWiynPCUKAQ/r0xgX/qFu79O+Ay9SF1t53Lgvz3sbcLX9EwenJ72ny62uSvHp9Bx9w+AyCl0hhV33+fu64PndUAZ4RkcRuTvW+EweGJNQxJzapARwoFfm9m6YKoTgInuvg/Cf2jAhIT1bmB1tZ0j/f/A7cHhk0cjDhmOyG0OZp4+nfDMOyPy961wGDxxTw0yQnzM3c8gvGt9m5ldkOgODQEj+f/A94HZwGJgH/DtoHzEbbOZjQF+CnzJ3WtPVDVG2bDZdoXD4IlnGpIRw90rg58HgZ8TPpRwoGO3Ovh5MHE9HFBdbeeI/T/g7gfcvc3d24Ef8odDRyNqm80shXAw/NjdfxYUj8jft8Jh8MQzDcmIYGZZZja24zlwKbCZj06zMpKnRulqO4uBPw3OYlkK1HQcjhjuOh1L/yzh3zd0PYXOsBPchuARoMzdvxOxaET+vuOZlVX6QVfTkCS4WwNlIvDz8N8SycCT7v6Cma0FnjGzW4DdwDUJ7GO/MLOngI8D+WZWAfwzcC+xt3MV8CnCg7LHgD8b9A73gy62+eNmtpjwYZOdwF/CiafQGYY+Rnh27HfNbENQ9n8Yob9vTZ8hIiJRdFhJRESiKBxERCSKwkFERKIoHEREJIrCQUREoigcRBIgmMX0y4nuh0hXFA4ifRRc5KS/JRlR9B9apBfMrCiY1/8hYD3wiJmVBPP8fz2i3k4z+7qZrQ/ub3FyjLb+wsyeN7OMwdwGkRNROIj03kmEp2Q+nfA8/0uARcCFZrYoot6hYBLC7wMfOZQUXDX/x8CV7v7hIPVbpFsKB5He2xXM0w/wOTNbD/ye8I1tIm/k1DFB2zqgKKL8RsKz1l7l7k0D3FeRHlE4iPReA0AwodyXgUvcfRHwKyA9ol7HB38bH53PbDPhsCgc8J6K9JDCQaTvsgkHRY2ZTSS8NxCP3xOeoK7YzKYMVOdEekPhINJH7r6R8Ad9KfAo8NserPsm4b2OX5lZ/sD0UKTnNCuriIhE0Z6DiIhEUTiIiEgUhYOIiERROIiISBSFg4iIRFE4iIhIFIWDiIhE+f8jrSB9eAmL/wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(albums[\"rank\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "#albums.weeks_top = albums.days_top.apply(lambda x :(x+7)/7, result_type='expand')\n", + "albums = albums.assign(weeks_top=(albums['days_top'] + 7) / 7)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endranklengthtrack_lengthdays_topweeks_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.07.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.057.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.01.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.023.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.026.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank length track_length days_top weeks_top \n", + "0 107 72.0 NaN 42.0 7.0 \n", + "1 8 1.0 230416.000000 392.0 57.0 \n", + "2 195 10.0 290017.000000 0.0 1.0 \n", + "3 17 NaN NaN 154.0 23.0 \n", + "4 46 12.0 224445.166667 175.0 26.0 " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumdate_startdate_endrankalbum_lengthtrack_lengthdays_topweeks_top
0Various ArtistsRoots Of Country Music (1965)1965-07-101965-08-2110772.0NaN42.07.0
1Kenny GSilhouette1988-10-221989-11-1881.0230416.000000392.057.0
2!!! (Chk Chk Chk)Myth Takes2007-03-242007-03-2419510.0290017.0000000.01.0
3\"Weird Al\" Yankovic\"Weird Al\" Yankovic In 3-D1984-03-171984-08-1817NaNNaN154.023.0
4\"Weird Al\" YankovicAlapalooza1993-10-231994-04-164612.0224445.166667175.026.0
\n", + "
" + ], + "text/plain": [ + " artist album date_start date_end \\\n", + "0 Various Artists Roots Of Country Music (1965) 1965-07-10 1965-08-21 \n", + "1 Kenny G Silhouette 1988-10-22 1989-11-18 \n", + "2 !!! (Chk Chk Chk) Myth Takes 2007-03-24 2007-03-24 \n", + "3 \"Weird Al\" Yankovic \"Weird Al\" Yankovic In 3-D 1984-03-17 1984-08-18 \n", + "4 \"Weird Al\" Yankovic Alapalooza 1993-10-23 1994-04-16 \n", + "\n", + " rank album_length track_length days_top weeks_top \n", + "0 107 72.0 NaN 42.0 7.0 \n", + "1 8 1.0 230416.000000 392.0 57.0 \n", + "2 195 10.0 290017.000000 0.0 1.0 \n", + "3 17 NaN NaN 154.0 23.0 \n", + "4 46 12.0 224445.166667 175.0 26.0 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums = albums.rename(columns={\"length\":\"album_length\"})\n", + "albums.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "albums_merge = albums.drop(columns=[\"date_start\",\"date_end\",\"days_top\",\"track_length\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumrankalbum_lengthweeks_top
0Various ArtistsRoots Of Country Music (1965)10772.07.0
1Kenny GSilhouette81.057.0
2!!! (Chk Chk Chk)Myth Takes19510.01.0
\n", + "
" + ], + "text/plain": [ + " artist album rank album_length \\\n", + "0 Various Artists Roots Of Country Music (1965) 107 72.0 \n", + "1 Kenny G Silhouette 8 1.0 \n", + "2 !!! (Chk Chk Chk) Myth Takes 195 10.0 \n", + "\n", + " weeks_top \n", + "0 7.0 \n", + "1 57.0 \n", + "2 1.0 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "albums_merge.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking = df_acoustic_features.merge(albums_merge,on=[\"artist\",\"album\"], how=\"left\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "339825 False\n", + "339826 False\n", + "339827 False\n", + "339828 False\n", + "339829 False\n", + "339830 False\n", + "339831 False\n", + "339832 False\n", + "339833 False\n", + "339834 False\n", + "339835 False\n", + "339836 False\n", + "339837 False\n", + "339838 False\n", + "339839 False\n", + "339840 False\n", + "339841 False\n", + "339842 False\n", + "339843 False\n", + "339844 False\n", + "339845 False\n", + "339846 False\n", + "339847 False\n", + "339848 False\n", + "339849 False\n", + "339850 False\n", + "339851 False\n", + "339852 False\n", + "339853 False\n", + "339854 False\n", + "Name: rank, Length: 339855, dtype: bool" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[\"rank\"].isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.0000006.0...0.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.0000007.0...0.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
218yllZD0TdF7ykcREib8Z1I Did ItHoodie SZNA Boogie Wit da Hoodie0.15300.718215305.00.4540.0000468.0...1.00.12789.4834.00.1963r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
31wJRveJZLSb1rjhnUHQiv6Swervin (feat. 6ix9ine)Hoodie SZNA Boogie Wit da Hoodie0.01530.581189487.00.6620.0000009.0...1.00.30393.0234.00.4343r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
40jAfdqv18goRTUxm3ilRjbStartender (feat. Offset and Tyga)Hoodie SZNA Boogie Wit da Hoodie0.02350.736192779.00.6220.0000006.0...0.00.133191.9714.00.5063r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
\n", + "

5 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "2 18yllZD0TdF7ykcREib8Z1 I Did It Hoodie SZN \n", + "3 1wJRveJZLSb1rjhnUHQiv6 Swervin (feat. 6ix9ine) Hoodie SZN \n", + "4 0jAfdqv18goRTUxm3ilRjb Startender (feat. Offset and Tyga) Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "2 A Boogie Wit da Hoodie 0.1530 0.718 215305.0 0.454 \n", + "3 A Boogie Wit da Hoodie 0.0153 0.581 189487.0 0.662 \n", + "4 A Boogie Wit da Hoodie 0.0235 0.736 192779.0 0.622 \n", + "\n", + " instrumentalness key ... mode speechiness tempo time_signature \\\n", + "0 0.000000 6.0 ... 0.0 0.427 90.195 4.0 \n", + "1 0.000000 7.0 ... 0.0 0.158 126.023 4.0 \n", + "2 0.000046 8.0 ... 1.0 0.127 89.483 4.0 \n", + "3 0.000000 9.0 ... 1.0 0.303 93.023 4.0 \n", + "4 0.000000 6.0 ... 0.0 0.133 191.971 4.0 \n", + "\n", + " valence album_id date rank album_length weeks_top \n", + "0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "1 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "2 0.196 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "3 0.434 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "4 0.506 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "\n", + "[5 rows x 22 columns]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
2419996yG67QYhkj9WVIt7WgyHE1SilhouetteSilhouette0.9350.731223507.00.1670.0000006.0...0.00.029794.9324.00.27461vU2oVw8d5QkTNhW8PYla2018-02-09NaNNaNNaN
3343000ZUFk0afed2FkqmuJ5zGxRShootin' Creek (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9530.712204307.00.5140.0042505.0...1.00.0391133.5584.00.9230jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343015Xhj8u4dAeZYaZwXQJsQ4EBaltimore Fire (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9010.512191800.00.4390.0000017.0...1.00.0311115.8614.00.9350jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343022bKkPsqxC7o4V2lcwbI18ULeaving Home (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9880.420189253.00.4310.0003140.0...1.00.0406205.5553.00.9010jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343031hPEhm8zQk4hAyYeD1pHUcThere'll Come a Time (with The North Carolina ...Roots Of Country Music (1965)0.9610.445209027.00.3320.0000005.0...1.00.0336182.9933.00.8310jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343043UG5yKkePf93z9RSBsbPE5White House Blues (with The North Carolina Ram...Roots Of Country Music (1965)0.9190.546208813.00.4090.0000067.0...1.00.0477121.4444.00.8150jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343053UCBtdESy842iTU1uHEogXThe Highwayman (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9300.523198253.00.3090.0000000.0...1.00.034796.0584.00.7760jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343067z33ai31tZQ9vEKQQwww5DHungry Hash House (with The North Carolina Ram...Roots Of Country Music (1965)0.9220.532203227.00.3970.0000017.0...1.00.0458116.4924.00.9020jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343073hntqfPOhpYrmNg33aqonbThe Letter that Never Came (with The North Car...Roots Of Country Music (1965)0.9650.533168800.00.4120.00000010.0...1.00.0462106.3164.00.7810jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343086rxrbo1o1QuL2SKV6M7NA1Take a Drink on Me (with The North Carolina Ra...Roots Of Country Music (1965)0.9240.588194680.00.5360.0000312.0...1.00.0333125.6724.00.9740jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343093cUQJtLu2yq4FBaoe36oklHusband and Wife Were Angry One Night (with Th...Roots Of Country Music (1965)0.9630.710170813.00.3220.0000007.0...1.00.028897.0444.00.9330jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343102k7AzvedyJktnDflUVcKYBRamblin' Blues (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9270.681185933.00.4480.0000490.0...1.00.0366121.7304.00.9210jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343113xongt156K2h28zHBCGkjVTook My Gal A-Walkin' (with The North Carolina...Roots Of Country Music (1965)0.9520.554168147.00.3120.0000072.0...1.00.0315104.9234.00.9280jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343127gHZDMFYE5Obq96MJRE5UDOld and Only in the Way (with The North Caroli...Roots Of Country Music (1965)0.9690.499207880.00.2900.0000005.0...1.00.0473175.9444.00.6490jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343130kK3Qs81peDYr3XPIYPctPDon't Let Your Deal Go Down Blues (with The No...Roots Of Country Music (1965)0.9900.594172027.00.4130.0477002.0...1.00.0367137.3384.00.9050jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343141W05sAuCWcViCCz6ib3KZyBill Mason (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9100.751180267.00.4780.0000227.0...1.00.0775119.0504.00.9560jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343153PzmwPmvLlYIyw5yiTjmKUA Kiss Waltz (with The North Carolina Ramblers...Roots Of Country Music (1965)0.8750.407188373.00.3550.8090005.0...1.00.0347165.1453.00.9380jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343164hFhhvQBSi53aR2EQ2yXjIFlop Eared Mule (with Chris Howland & The High...Roots Of Country Music (1965)0.9740.638178387.00.7500.5340002.0...1.00.0399131.7644.00.8850jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343170uw1DoIdB75eilYSFPpEDpA Trip to New York, Pt. 1 (with Allegheny High...Roots Of Country Music (1965)0.9250.573185413.00.4160.0000037.0...1.00.3810133.9304.00.8500jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343186F6KTnlicSqAlNwW8MgMjKSweet Sixteen (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9170.644174053.00.4030.0000007.0...1.00.0450114.8014.00.8920jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343193Dj4DXFrIf1GZMXyL01C3JWrite a Letter to My Mother (with The North Ca...Roots Of Country Music (1965)0.8750.696181773.00.4070.0001418.0...1.00.026797.0324.00.9290jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343205O1FjxnI4fGAdJWRv9Uwy9If the River Was Whiskey (with The North Carol...Roots Of Country Music (1965)0.9230.528189573.00.3540.0000073.0...1.00.049999.1704.00.7430jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343212qZjqI8LU7efzauVIonjHlMother's Last Farewell Kiss (with The North Ca...Roots Of Country Music (1965)0.9660.486183853.00.2260.0000005.0...1.00.0351173.1934.00.5770jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343222s1fbQyQ5hPjsbbGZkMC2RMilwaukee Blues (with The North Carolina Rambl...Roots Of Country Music (1965)0.9780.738197267.00.4110.0000291.0...1.00.0390106.9084.00.9190jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343237sqb1bBC5EKowm4oqoKkmoWhere the Whippoorwill Is Whispering Good-Nigh...Roots Of Country Music (1965)0.9740.667189733.00.2390.0000008.0...1.00.033590.3504.00.7210jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343245MgjNSxvgAGWyQF7C2jKaFThe Girl I Left in Sunny Tennessee (with The N...Roots Of Country Music (1965)0.9910.709201840.00.4230.0423002.0...1.00.0326128.8474.00.8850jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343255XrKPffmwEP65ZiCTeOeX4Sunny Tennesee (with Floyd County Ramblers) - ...Roots Of Country Music (1965)0.9440.491196267.00.3910.0000000.0...1.00.0334114.4474.00.6240jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343262woAfvj29PxBBWvhJ6QiQHBulldog Down in Sunny Tennessee (with Dock Walsh)Roots Of Country Music (1965)0.9640.648160053.00.3970.0000085.0...1.00.0402167.3104.00.9220jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33432743zp4lGaVAbD86JsU7IuNjMoving Day (with Arthur Collins)Roots Of Country Music (1965)0.9940.658193360.00.1940.0000005.0...1.00.084975.2874.00.6980jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343287foZTkS7CxXV1W28GoB5O1It's Movin' Day (with The North Carolina Rambl...Roots Of Country Music (1965)0.8710.540207107.00.3490.0000136.0...1.00.033995.8014.00.8420jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33432922j6YCqpxVHXF3EmsYRMBkHome Sweet, Home (with Frank Jenkins)Roots Of Country Music (1965)0.9740.481153160.00.5370.8810002.0...1.00.0414129.5724.00.9740jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343306pH9yEW5e4Lnd7MIY6wFN3I'm the Man that Rode the Mule 'Round the Worl...Roots Of Country Music (1965)0.9900.668183293.00.3920.4190002.0...1.00.0476139.6314.00.9230jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343314eIEru6KjlZeio3zHj7obaMan that Rode the Mule Around the World (with ...Roots Of Country Music (1965)0.9460.692175547.00.3410.0000328.0...1.00.0436123.5764.00.7970jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33433266hnNRU8IhBnxJZS7I3xXGLynchburg Town (with Chris Howland & The Highl...Roots Of Country Music (1965)0.9700.709180667.00.6520.6510000.0...1.00.0313131.1214.00.7730jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343337v4VLqGkxpAmFO7FIUmj6rGoing Down to Lynchburg Town / Don't Let Your ...Roots Of Country Music (1965)0.9930.582188600.00.5650.8820008.0...1.00.0350145.4124.00.9360jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343343z7ZQf4bZgCFLjPjXmJFeZSome One (with Branch & Coleman)Roots Of Country Music (1965)0.9830.462183360.00.2770.00000010.0...1.00.0323178.3213.00.6340jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343352kUpX7DIdcvjbPsEFYK5GhMonkey on a String (with Cal Stewart)Roots Of Country Music (1965)0.9830.590151040.00.3660.00000011.0...1.00.255083.0113.00.9350jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33433633s2KcXL1G6pQkjZsi01YcMonkey on a String (with The North Carolina Ra...Roots Of Country Music (1965)0.9890.589187253.00.4540.00722010.0...1.00.0391102.9264.00.8870jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343375dgeBwWjLx357PEFJtUQKXCan I Sleep in Your Barn Tonight Mister (with ...Roots Of Country Music (1965)0.9930.675191693.00.3600.0025309.0...1.00.0406132.8184.00.8650jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343380gN9Hm4DeszZez2XIXyyEvMay I Sleep in Your Barn Tonight, Mister (with...Roots Of Country Music (1965)0.9270.649171600.00.5220.0060006.0...1.00.0599126.3004.00.5670jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343396BqRH6HC4Qqn47dJz0iYAIMarried Life Blues (with Byron Parker)Roots Of Country Music (1965)0.9690.567160987.00.3690.0000032.0...1.00.0323113.0874.00.7400jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343400WPljjnATI6qeuBzvRjmtVThe Infanta March (with Fred van Eps)Roots Of Country Music (1965)0.9680.551260560.00.6310.9500000.0...1.00.0571123.8164.00.9630jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343411WnKHpJccBviOIVY8ttV4HSunset March (with The North Carolina Ramblers)Roots Of Country Music (1965)0.9790.637159760.00.4500.9410000.0...1.00.0652114.5024.00.6480jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343426mkkTKSp6UvaeZSsABnXvOI'll Roll in My Sweet Baby's Arms (with Carter...Roots Of Country Music (1965)0.9650.657178080.00.4380.00000010.0...0.00.0846144.2144.00.8130jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343431WsnmtfBvMtwW38EfzR62gGoodbye Eliza Jane (with Peerless Quartet)Roots Of Country Music (1965)0.9950.65597107.00.4580.0000004.0...0.00.8840114.8511.00.4050jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33434467BWokCtjylxW4hsq2EcysGood-Bye Sweet Liza Jane (with The North Carol...Roots Of Country Music (1965)0.9470.657185147.00.3500.0000007.0...1.00.0387105.3394.00.8980jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343454VKveoCmYHMm74GFORF9gmGoodbye Booze (with The North Carolina Rambler...Roots Of Country Music (1965)0.9870.399195240.00.3050.0053405.0...1.00.0338182.0234.00.8480jckzrST0anXNTY13KMbSw2005NaNNaNNaN
33434648l5jPrKfDdVLLgL89kQxiGoodbye Booze (with Fate Norris & Gid Tanner)Roots Of Country Music (1965)0.9880.608170333.00.2860.0000038.0...1.00.0406120.2454.00.6500jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343471L9FE9o1C480C0Argv1K3uYou Ain't Talking to Me (with Eddie Morton)Roots Of Country Music (1965)0.9960.533174920.00.2770.0016404.0...0.00.196085.1844.00.7650jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343483ioWkuIRTynwbAgfPP1eVLYou Ain't Talkin' to Me (with The North Caroli...Roots Of Country Music (1965)0.9310.628177480.00.4030.0000012.0...1.00.0429122.5154.00.8670jckzrST0anXNTY13KMbSw2005NaNNaNNaN
3343492J4Mu7AEozVHgoMzOy214LIf I Lose, I Don't Care (with The North Caroli...Roots Of Country Music (1965)0.9380.558187600.00.3660.0000092.0...1.00.0497124.8024.00.8730jckzrST0anXNTY13KMbSw2005NaNNaNNaN
\n", + "

51 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "241999 6yG67QYhkj9WVIt7WgyHE1 \n", + "334300 0ZUFk0afed2FkqmuJ5zGxR \n", + "334301 5Xhj8u4dAeZYaZwXQJsQ4E \n", + "334302 2bKkPsqxC7o4V2lcwbI18U \n", + "334303 1hPEhm8zQk4hAyYeD1pHUc \n", + "334304 3UG5yKkePf93z9RSBsbPE5 \n", + "334305 3UCBtdESy842iTU1uHEogX \n", + "334306 7z33ai31tZQ9vEKQQwww5D \n", + "334307 3hntqfPOhpYrmNg33aqonb \n", + "334308 6rxrbo1o1QuL2SKV6M7NA1 \n", + "334309 3cUQJtLu2yq4FBaoe36okl \n", + "334310 2k7AzvedyJktnDflUVcKYB \n", + "334311 3xongt156K2h28zHBCGkjV \n", + "334312 7gHZDMFYE5Obq96MJRE5UD \n", + "334313 0kK3Qs81peDYr3XPIYPctP \n", + "334314 1W05sAuCWcViCCz6ib3KZy \n", + "334315 3PzmwPmvLlYIyw5yiTjmKU \n", + "334316 4hFhhvQBSi53aR2EQ2yXjI \n", + "334317 0uw1DoIdB75eilYSFPpEDp \n", + "334318 6F6KTnlicSqAlNwW8MgMjK \n", + "334319 3Dj4DXFrIf1GZMXyL01C3J \n", + "334320 5O1FjxnI4fGAdJWRv9Uwy9 \n", + "334321 2qZjqI8LU7efzauVIonjHl \n", + "334322 2s1fbQyQ5hPjsbbGZkMC2R \n", + "334323 7sqb1bBC5EKowm4oqoKkmo \n", + "334324 5MgjNSxvgAGWyQF7C2jKaF \n", + "334325 5XrKPffmwEP65ZiCTeOeX4 \n", + "334326 2woAfvj29PxBBWvhJ6QiQH \n", + "334327 43zp4lGaVAbD86JsU7IuNj \n", + "334328 7foZTkS7CxXV1W28GoB5O1 \n", + "334329 22j6YCqpxVHXF3EmsYRMBk \n", + "334330 6pH9yEW5e4Lnd7MIY6wFN3 \n", + "334331 4eIEru6KjlZeio3zHj7oba \n", + "334332 66hnNRU8IhBnxJZS7I3xXG \n", + "334333 7v4VLqGkxpAmFO7FIUmj6r \n", + "334334 3z7ZQf4bZgCFLjPjXmJFeZ \n", + "334335 2kUpX7DIdcvjbPsEFYK5Gh \n", + "334336 33s2KcXL1G6pQkjZsi01Yc \n", + "334337 5dgeBwWjLx357PEFJtUQKX \n", + "334338 0gN9Hm4DeszZez2XIXyyEv \n", + "334339 6BqRH6HC4Qqn47dJz0iYAI \n", + "334340 0WPljjnATI6qeuBzvRjmtV \n", + "334341 1WnKHpJccBviOIVY8ttV4H \n", + "334342 6mkkTKSp6UvaeZSsABnXvO \n", + "334343 1WsnmtfBvMtwW38EfzR62g \n", + "334344 67BWokCtjylxW4hsq2Ecys \n", + "334345 4VKveoCmYHMm74GFORF9gm \n", + "334346 48l5jPrKfDdVLLgL89kQxi \n", + "334347 1L9FE9o1C480C0Argv1K3u \n", + "334348 3ioWkuIRTynwbAgfPP1eVL \n", + "334349 2J4Mu7AEozVHgoMzOy214L \n", + "\n", + " song \\\n", + "241999 Silhouette \n", + "334300 Shootin' Creek (with The North Carolina Ramblers) \n", + "334301 Baltimore Fire (with The North Carolina Ramblers) \n", + "334302 Leaving Home (with The North Carolina Ramblers) \n", + "334303 There'll Come a Time (with The North Carolina ... \n", + "334304 White House Blues (with The North Carolina Ram... \n", + "334305 The Highwayman (with The North Carolina Ramblers) \n", + "334306 Hungry Hash House (with The North Carolina Ram... \n", + "334307 The Letter that Never Came (with The North Car... \n", + "334308 Take a Drink on Me (with The North Carolina Ra... \n", + "334309 Husband and Wife Were Angry One Night (with Th... \n", + "334310 Ramblin' Blues (with The North Carolina Ramblers) \n", + "334311 Took My Gal A-Walkin' (with The North Carolina... \n", + "334312 Old and Only in the Way (with The North Caroli... \n", + "334313 Don't Let Your Deal Go Down Blues (with The No... \n", + "334314 Bill Mason (with The North Carolina Ramblers) \n", + "334315 A Kiss Waltz (with The North Carolina Ramblers... \n", + "334316 Flop Eared Mule (with Chris Howland & The High... \n", + "334317 A Trip to New York, Pt. 1 (with Allegheny High... \n", + "334318 Sweet Sixteen (with The North Carolina Ramblers) \n", + "334319 Write a Letter to My Mother (with The North Ca... \n", + "334320 If the River Was Whiskey (with The North Carol... \n", + "334321 Mother's Last Farewell Kiss (with The North Ca... \n", + "334322 Milwaukee Blues (with The North Carolina Rambl... \n", + "334323 Where the Whippoorwill Is Whispering Good-Nigh... \n", + "334324 The Girl I Left in Sunny Tennessee (with The N... \n", + "334325 Sunny Tennesee (with Floyd County Ramblers) - ... \n", + "334326 Bulldog Down in Sunny Tennessee (with Dock Walsh) \n", + "334327 Moving Day (with Arthur Collins) \n", + "334328 It's Movin' Day (with The North Carolina Rambl... \n", + "334329 Home Sweet, Home (with Frank Jenkins) \n", + "334330 I'm the Man that Rode the Mule 'Round the Worl... \n", + "334331 Man that Rode the Mule Around the World (with ... \n", + "334332 Lynchburg Town (with Chris Howland & The Highl... \n", + "334333 Going Down to Lynchburg Town / Don't Let Your ... \n", + "334334 Some One (with Branch & Coleman) \n", + "334335 Monkey on a String (with Cal Stewart) \n", + "334336 Monkey on a String (with The North Carolina Ra... \n", + "334337 Can I Sleep in Your Barn Tonight Mister (with ... \n", + "334338 May I Sleep in Your Barn Tonight, Mister (with... \n", + "334339 Married Life Blues (with Byron Parker) \n", + "334340 The Infanta March (with Fred van Eps) \n", + "334341 Sunset March (with The North Carolina Ramblers) \n", + "334342 I'll Roll in My Sweet Baby's Arms (with Carter... \n", + "334343 Goodbye Eliza Jane (with Peerless Quartet) \n", + "334344 Good-Bye Sweet Liza Jane (with The North Carol... \n", + "334345 Goodbye Booze (with The North Carolina Rambler... \n", + "334346 Goodbye Booze (with Fate Norris & Gid Tanner) \n", + "334347 You Ain't Talking to Me (with Eddie Morton) \n", + "334348 You Ain't Talkin' to Me (with The North Caroli... \n", + "334349 If I Lose, I Don't Care (with The North Caroli... \n", + "\n", + " album artist acousticness danceability \\\n", + "241999 Silhouette 0.935 0.731 \n", + "334300 Roots Of Country Music (1965) 0.953 0.712 \n", + "334301 Roots Of Country Music (1965) 0.901 0.512 \n", + "334302 Roots Of Country Music (1965) 0.988 0.420 \n", + "334303 Roots Of Country Music (1965) 0.961 0.445 \n", + "334304 Roots Of Country Music (1965) 0.919 0.546 \n", + "334305 Roots Of Country Music (1965) 0.930 0.523 \n", + "334306 Roots Of Country Music (1965) 0.922 0.532 \n", + "334307 Roots Of Country Music (1965) 0.965 0.533 \n", + "334308 Roots Of Country Music (1965) 0.924 0.588 \n", + "334309 Roots Of Country Music (1965) 0.963 0.710 \n", + "334310 Roots Of Country Music (1965) 0.927 0.681 \n", + "334311 Roots Of Country Music (1965) 0.952 0.554 \n", + "334312 Roots Of Country Music (1965) 0.969 0.499 \n", + "334313 Roots Of Country Music (1965) 0.990 0.594 \n", + "334314 Roots Of Country Music (1965) 0.910 0.751 \n", + "334315 Roots Of Country Music (1965) 0.875 0.407 \n", + "334316 Roots Of Country Music (1965) 0.974 0.638 \n", + "334317 Roots Of Country Music (1965) 0.925 0.573 \n", + "334318 Roots Of Country Music (1965) 0.917 0.644 \n", + "334319 Roots Of Country Music (1965) 0.875 0.696 \n", + "334320 Roots Of Country Music (1965) 0.923 0.528 \n", + "334321 Roots Of Country Music (1965) 0.966 0.486 \n", + "334322 Roots Of Country Music (1965) 0.978 0.738 \n", + "334323 Roots Of Country Music (1965) 0.974 0.667 \n", + "334324 Roots Of Country Music (1965) 0.991 0.709 \n", + "334325 Roots Of Country Music (1965) 0.944 0.491 \n", + "334326 Roots Of Country Music (1965) 0.964 0.648 \n", + "334327 Roots Of Country Music (1965) 0.994 0.658 \n", + "334328 Roots Of Country Music (1965) 0.871 0.540 \n", + "334329 Roots Of Country Music (1965) 0.974 0.481 \n", + "334330 Roots Of Country Music (1965) 0.990 0.668 \n", + "334331 Roots Of Country Music (1965) 0.946 0.692 \n", + "334332 Roots Of Country Music (1965) 0.970 0.709 \n", + "334333 Roots Of Country Music (1965) 0.993 0.582 \n", + "334334 Roots Of Country Music (1965) 0.983 0.462 \n", + "334335 Roots Of Country Music (1965) 0.983 0.590 \n", + "334336 Roots Of Country Music (1965) 0.989 0.589 \n", + "334337 Roots Of Country Music (1965) 0.993 0.675 \n", + "334338 Roots Of Country Music (1965) 0.927 0.649 \n", + "334339 Roots Of Country Music (1965) 0.969 0.567 \n", + "334340 Roots Of Country Music (1965) 0.968 0.551 \n", + "334341 Roots Of Country Music (1965) 0.979 0.637 \n", + "334342 Roots Of Country Music (1965) 0.965 0.657 \n", + "334343 Roots Of Country Music (1965) 0.995 0.655 \n", + "334344 Roots Of Country Music (1965) 0.947 0.657 \n", + "334345 Roots Of Country Music (1965) 0.987 0.399 \n", + "334346 Roots Of Country Music (1965) 0.988 0.608 \n", + "334347 Roots Of Country Music (1965) 0.996 0.533 \n", + "334348 Roots Of Country Music (1965) 0.931 0.628 \n", + "334349 Roots Of Country Music (1965) 0.938 0.558 \n", + "\n", + " duration_ms energy instrumentalness key ... mode speechiness \\\n", + "241999 223507.0 0.167 0.000000 6.0 ... 0.0 0.0297 \n", + "334300 204307.0 0.514 0.004250 5.0 ... 1.0 0.0391 \n", + "334301 191800.0 0.439 0.000001 7.0 ... 1.0 0.0311 \n", + "334302 189253.0 0.431 0.000314 0.0 ... 1.0 0.0406 \n", + "334303 209027.0 0.332 0.000000 5.0 ... 1.0 0.0336 \n", + "334304 208813.0 0.409 0.000006 7.0 ... 1.0 0.0477 \n", + "334305 198253.0 0.309 0.000000 0.0 ... 1.0 0.0347 \n", + "334306 203227.0 0.397 0.000001 7.0 ... 1.0 0.0458 \n", + "334307 168800.0 0.412 0.000000 10.0 ... 1.0 0.0462 \n", + "334308 194680.0 0.536 0.000031 2.0 ... 1.0 0.0333 \n", + "334309 170813.0 0.322 0.000000 7.0 ... 1.0 0.0288 \n", + "334310 185933.0 0.448 0.000049 0.0 ... 1.0 0.0366 \n", + "334311 168147.0 0.312 0.000007 2.0 ... 1.0 0.0315 \n", + "334312 207880.0 0.290 0.000000 5.0 ... 1.0 0.0473 \n", + "334313 172027.0 0.413 0.047700 2.0 ... 1.0 0.0367 \n", + "334314 180267.0 0.478 0.000022 7.0 ... 1.0 0.0775 \n", + "334315 188373.0 0.355 0.809000 5.0 ... 1.0 0.0347 \n", + "334316 178387.0 0.750 0.534000 2.0 ... 1.0 0.0399 \n", + "334317 185413.0 0.416 0.000003 7.0 ... 1.0 0.3810 \n", + "334318 174053.0 0.403 0.000000 7.0 ... 1.0 0.0450 \n", + "334319 181773.0 0.407 0.000141 8.0 ... 1.0 0.0267 \n", + "334320 189573.0 0.354 0.000007 3.0 ... 1.0 0.0499 \n", + "334321 183853.0 0.226 0.000000 5.0 ... 1.0 0.0351 \n", + "334322 197267.0 0.411 0.000029 1.0 ... 1.0 0.0390 \n", + "334323 189733.0 0.239 0.000000 8.0 ... 1.0 0.0335 \n", + "334324 201840.0 0.423 0.042300 2.0 ... 1.0 0.0326 \n", + "334325 196267.0 0.391 0.000000 0.0 ... 1.0 0.0334 \n", + "334326 160053.0 0.397 0.000008 5.0 ... 1.0 0.0402 \n", + "334327 193360.0 0.194 0.000000 5.0 ... 1.0 0.0849 \n", + "334328 207107.0 0.349 0.000013 6.0 ... 1.0 0.0339 \n", + "334329 153160.0 0.537 0.881000 2.0 ... 1.0 0.0414 \n", + "334330 183293.0 0.392 0.419000 2.0 ... 1.0 0.0476 \n", + "334331 175547.0 0.341 0.000032 8.0 ... 1.0 0.0436 \n", + "334332 180667.0 0.652 0.651000 0.0 ... 1.0 0.0313 \n", + "334333 188600.0 0.565 0.882000 8.0 ... 1.0 0.0350 \n", + "334334 183360.0 0.277 0.000000 10.0 ... 1.0 0.0323 \n", + "334335 151040.0 0.366 0.000000 11.0 ... 1.0 0.2550 \n", + "334336 187253.0 0.454 0.007220 10.0 ... 1.0 0.0391 \n", + "334337 191693.0 0.360 0.002530 9.0 ... 1.0 0.0406 \n", + "334338 171600.0 0.522 0.006000 6.0 ... 1.0 0.0599 \n", + "334339 160987.0 0.369 0.000003 2.0 ... 1.0 0.0323 \n", + "334340 260560.0 0.631 0.950000 0.0 ... 1.0 0.0571 \n", + "334341 159760.0 0.450 0.941000 0.0 ... 1.0 0.0652 \n", + "334342 178080.0 0.438 0.000000 10.0 ... 0.0 0.0846 \n", + "334343 97107.0 0.458 0.000000 4.0 ... 0.0 0.8840 \n", + "334344 185147.0 0.350 0.000000 7.0 ... 1.0 0.0387 \n", + "334345 195240.0 0.305 0.005340 5.0 ... 1.0 0.0338 \n", + "334346 170333.0 0.286 0.000003 8.0 ... 1.0 0.0406 \n", + "334347 174920.0 0.277 0.001640 4.0 ... 0.0 0.1960 \n", + "334348 177480.0 0.403 0.000001 2.0 ... 1.0 0.0429 \n", + "334349 187600.0 0.366 0.000009 2.0 ... 1.0 0.0497 \n", + "\n", + " tempo time_signature valence album_id date \\\n", + "241999 94.932 4.0 0.274 61vU2oVw8d5QkTNhW8PYla 2018-02-09 \n", + "334300 133.558 4.0 0.923 0jckzrST0anXNTY13KMbSw 2005 \n", + "334301 115.861 4.0 0.935 0jckzrST0anXNTY13KMbSw 2005 \n", + "334302 205.555 3.0 0.901 0jckzrST0anXNTY13KMbSw 2005 \n", + "334303 182.993 3.0 0.831 0jckzrST0anXNTY13KMbSw 2005 \n", + "334304 121.444 4.0 0.815 0jckzrST0anXNTY13KMbSw 2005 \n", + "334305 96.058 4.0 0.776 0jckzrST0anXNTY13KMbSw 2005 \n", + "334306 116.492 4.0 0.902 0jckzrST0anXNTY13KMbSw 2005 \n", + "334307 106.316 4.0 0.781 0jckzrST0anXNTY13KMbSw 2005 \n", + "334308 125.672 4.0 0.974 0jckzrST0anXNTY13KMbSw 2005 \n", + "334309 97.044 4.0 0.933 0jckzrST0anXNTY13KMbSw 2005 \n", + "334310 121.730 4.0 0.921 0jckzrST0anXNTY13KMbSw 2005 \n", + "334311 104.923 4.0 0.928 0jckzrST0anXNTY13KMbSw 2005 \n", + "334312 175.944 4.0 0.649 0jckzrST0anXNTY13KMbSw 2005 \n", + "334313 137.338 4.0 0.905 0jckzrST0anXNTY13KMbSw 2005 \n", + "334314 119.050 4.0 0.956 0jckzrST0anXNTY13KMbSw 2005 \n", + "334315 165.145 3.0 0.938 0jckzrST0anXNTY13KMbSw 2005 \n", + "334316 131.764 4.0 0.885 0jckzrST0anXNTY13KMbSw 2005 \n", + "334317 133.930 4.0 0.850 0jckzrST0anXNTY13KMbSw 2005 \n", + "334318 114.801 4.0 0.892 0jckzrST0anXNTY13KMbSw 2005 \n", + "334319 97.032 4.0 0.929 0jckzrST0anXNTY13KMbSw 2005 \n", + "334320 99.170 4.0 0.743 0jckzrST0anXNTY13KMbSw 2005 \n", + "334321 173.193 4.0 0.577 0jckzrST0anXNTY13KMbSw 2005 \n", + "334322 106.908 4.0 0.919 0jckzrST0anXNTY13KMbSw 2005 \n", + "334323 90.350 4.0 0.721 0jckzrST0anXNTY13KMbSw 2005 \n", + "334324 128.847 4.0 0.885 0jckzrST0anXNTY13KMbSw 2005 \n", + "334325 114.447 4.0 0.624 0jckzrST0anXNTY13KMbSw 2005 \n", + "334326 167.310 4.0 0.922 0jckzrST0anXNTY13KMbSw 2005 \n", + "334327 75.287 4.0 0.698 0jckzrST0anXNTY13KMbSw 2005 \n", + "334328 95.801 4.0 0.842 0jckzrST0anXNTY13KMbSw 2005 \n", + "334329 129.572 4.0 0.974 0jckzrST0anXNTY13KMbSw 2005 \n", + "334330 139.631 4.0 0.923 0jckzrST0anXNTY13KMbSw 2005 \n", + "334331 123.576 4.0 0.797 0jckzrST0anXNTY13KMbSw 2005 \n", + "334332 131.121 4.0 0.773 0jckzrST0anXNTY13KMbSw 2005 \n", + "334333 145.412 4.0 0.936 0jckzrST0anXNTY13KMbSw 2005 \n", + "334334 178.321 3.0 0.634 0jckzrST0anXNTY13KMbSw 2005 \n", + "334335 83.011 3.0 0.935 0jckzrST0anXNTY13KMbSw 2005 \n", + "334336 102.926 4.0 0.887 0jckzrST0anXNTY13KMbSw 2005 \n", + "334337 132.818 4.0 0.865 0jckzrST0anXNTY13KMbSw 2005 \n", + "334338 126.300 4.0 0.567 0jckzrST0anXNTY13KMbSw 2005 \n", + "334339 113.087 4.0 0.740 0jckzrST0anXNTY13KMbSw 2005 \n", + "334340 123.816 4.0 0.963 0jckzrST0anXNTY13KMbSw 2005 \n", + "334341 114.502 4.0 0.648 0jckzrST0anXNTY13KMbSw 2005 \n", + "334342 144.214 4.0 0.813 0jckzrST0anXNTY13KMbSw 2005 \n", + "334343 114.851 1.0 0.405 0jckzrST0anXNTY13KMbSw 2005 \n", + "334344 105.339 4.0 0.898 0jckzrST0anXNTY13KMbSw 2005 \n", + "334345 182.023 4.0 0.848 0jckzrST0anXNTY13KMbSw 2005 \n", + "334346 120.245 4.0 0.650 0jckzrST0anXNTY13KMbSw 2005 \n", + "334347 85.184 4.0 0.765 0jckzrST0anXNTY13KMbSw 2005 \n", + "334348 122.515 4.0 0.867 0jckzrST0anXNTY13KMbSw 2005 \n", + "334349 124.802 4.0 0.873 0jckzrST0anXNTY13KMbSw 2005 \n", + "\n", + " rank album_length weeks_top \n", + "241999 NaN NaN NaN \n", + "334300 NaN NaN NaN \n", + "334301 NaN NaN NaN \n", + "334302 NaN NaN NaN \n", + "334303 NaN NaN NaN \n", + "334304 NaN NaN NaN \n", + "334305 NaN NaN NaN \n", + "334306 NaN NaN NaN \n", + "334307 NaN NaN NaN \n", + "334308 NaN NaN NaN \n", + "334309 NaN NaN NaN \n", + "334310 NaN NaN NaN \n", + "334311 NaN NaN NaN \n", + "334312 NaN NaN NaN \n", + "334313 NaN NaN NaN \n", + "334314 NaN NaN NaN \n", + "334315 NaN NaN NaN \n", + "334316 NaN NaN NaN \n", + "334317 NaN NaN NaN \n", + "334318 NaN NaN NaN \n", + "334319 NaN NaN NaN \n", + "334320 NaN NaN NaN \n", + "334321 NaN NaN NaN \n", + "334322 NaN NaN NaN \n", + "334323 NaN NaN NaN \n", + "334324 NaN NaN NaN \n", + "334325 NaN NaN NaN \n", + "334326 NaN NaN NaN \n", + "334327 NaN NaN NaN \n", + "334328 NaN NaN NaN \n", + "334329 NaN NaN NaN \n", + "334330 NaN NaN NaN \n", + "334331 NaN NaN NaN \n", + "334332 NaN NaN NaN \n", + "334333 NaN NaN NaN \n", + "334334 NaN NaN NaN \n", + "334335 NaN NaN NaN \n", + "334336 NaN NaN NaN \n", + "334337 NaN NaN NaN \n", + "334338 NaN NaN NaN \n", + "334339 NaN NaN NaN \n", + "334340 NaN NaN NaN \n", + "334341 NaN NaN NaN \n", + "334342 NaN NaN NaN \n", + "334343 NaN NaN NaN \n", + "334344 NaN NaN NaN \n", + "334345 NaN NaN NaN \n", + "334346 NaN NaN NaN \n", + "334347 NaN NaN NaN \n", + "334348 NaN NaN NaN \n", + "334349 NaN NaN NaN \n", + "\n", + "[51 rows x 22 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[df_ranking['rank'].isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking.dropna(subset=[\"rank\"],inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id 0\n", + "song 0\n", + "album 0\n", + "artist 0\n", + "acousticness 5\n", + "danceability 5\n", + "duration_ms 5\n", + "energy 5\n", + "instrumentalness 5\n", + "key 5\n", + "liveness 5\n", + "loudness 5\n", + "mode 5\n", + "speechiness 5\n", + "tempo 5\n", + "time_signature 5\n", + "valence 5\n", + "album_id 0\n", + "date 0\n", + "rank 0\n", + "album_length 0\n", + "weeks_top 0\n", + "dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "df_ranking.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id object\n", + "song object\n", + "album object\n", + "artist object\n", + "acousticness float64\n", + "danceability float64\n", + "duration_ms float64\n", + "energy float64\n", + "instrumentalness float64\n", + "key float64\n", + "liveness float64\n", + "loudness float64\n", + "mode float64\n", + "speechiness float64\n", + "tempo float64\n", + "time_signature float64\n", + "valence float64\n", + "album_id object\n", + "date datetime64[ns]\n", + "rank float64\n", + "album_length float64\n", + "weeks_top float64\n", + "dtype: object" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[\"date\"] = pd.to_datetime(df_ranking[\"date\"])\n", + "df_ranking.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.182143-0.107886-0.7103060.122013-0.027495-0.009117-0.5597770.0772210.015607-0.181678-0.183628-0.2222750.0409180.1125580.019238
danceability-0.1821431.000000-0.0629800.111136-0.1951270.022850-0.1493990.139794-0.0543020.153791-0.1000580.2039100.545940-0.024367-0.063765-0.021480
duration_ms-0.107886-0.0629801.0000000.0593780.0538160.0037080.0734630.032735-0.051887-0.0685700.0138740.056457-0.126540-0.005704-0.040620-0.001930
energy-0.7103060.1111360.0593781.000000-0.1272210.0327370.1745210.762424-0.0672450.1154650.2196660.1906270.313298-0.030172-0.079614-0.021812
instrumentalness0.122013-0.1951270.053816-0.1272211.000000-0.011255-0.052372-0.282310-0.053474-0.104155-0.027658-0.045885-0.1745940.0640750.093708-0.003262
key-0.0274950.0228500.0037080.032737-0.0112551.000000-0.0015570.020048-0.1577060.0279930.0021520.0067780.0253400.001331-0.006069-0.002369
liveness-0.009117-0.1493990.0734630.174521-0.052372-0.0015571.0000000.0625700.0117190.2356810.002506-0.012652-0.034016-0.0070860.058215-0.006417
loudness-0.5597770.1397940.0327350.762424-0.2823100.0200480.0625701.000000-0.0180380.0075240.1782240.1618840.192464-0.049147-0.116179-0.022634
mode0.077221-0.054302-0.051887-0.067245-0.053474-0.1577060.011719-0.0180381.000000-0.0763680.011123-0.0129780.0093700.0042670.0126620.015574
speechiness0.0156070.153791-0.0685700.115465-0.1041550.0279930.2356810.007524-0.0763681.000000-0.030372-0.0101850.029355-0.0204770.014656-0.013571
tempo-0.181678-0.1000580.0138740.219666-0.0276580.0021520.0025060.1782240.011123-0.0303721.0000000.0598490.112866-0.003569-0.0332320.000926
time_signature-0.1836280.2039100.0564570.190627-0.0458850.006778-0.0126520.161884-0.012978-0.0101850.0598491.0000000.162042-0.011134-0.041868-0.004096
valence-0.2222750.545940-0.1265400.313298-0.1745940.025340-0.0340160.1924640.0093700.0293550.1128660.1620421.0000000.008667-0.0234290.006013
rank0.040918-0.024367-0.005704-0.0301720.0640750.001331-0.007086-0.0491470.004267-0.020477-0.003569-0.0111340.0086671.0000000.010702-0.093513
album_length0.112558-0.063765-0.040620-0.0796140.093708-0.0060690.058215-0.1161790.0126620.014656-0.033232-0.041868-0.0234290.0107021.0000000.071388
weeks_top0.019238-0.021480-0.001930-0.021812-0.003262-0.002369-0.006417-0.0226340.015574-0.0135710.000926-0.0040960.006013-0.0935130.0713881.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.182143 -0.107886 -0.710306 \n", + "danceability -0.182143 1.000000 -0.062980 0.111136 \n", + "duration_ms -0.107886 -0.062980 1.000000 0.059378 \n", + "energy -0.710306 0.111136 0.059378 1.000000 \n", + "instrumentalness 0.122013 -0.195127 0.053816 -0.127221 \n", + "key -0.027495 0.022850 0.003708 0.032737 \n", + "liveness -0.009117 -0.149399 0.073463 0.174521 \n", + "loudness -0.559777 0.139794 0.032735 0.762424 \n", + "mode 0.077221 -0.054302 -0.051887 -0.067245 \n", + "speechiness 0.015607 0.153791 -0.068570 0.115465 \n", + "tempo -0.181678 -0.100058 0.013874 0.219666 \n", + "time_signature -0.183628 0.203910 0.056457 0.190627 \n", + "valence -0.222275 0.545940 -0.126540 0.313298 \n", + "rank 0.040918 -0.024367 -0.005704 -0.030172 \n", + "album_length 0.112558 -0.063765 -0.040620 -0.079614 \n", + "weeks_top 0.019238 -0.021480 -0.001930 -0.021812 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.122013 -0.027495 -0.009117 -0.559777 0.077221 \n", + "danceability -0.195127 0.022850 -0.149399 0.139794 -0.054302 \n", + "duration_ms 0.053816 0.003708 0.073463 0.032735 -0.051887 \n", + "energy -0.127221 0.032737 0.174521 0.762424 -0.067245 \n", + "instrumentalness 1.000000 -0.011255 -0.052372 -0.282310 -0.053474 \n", + "key -0.011255 1.000000 -0.001557 0.020048 -0.157706 \n", + "liveness -0.052372 -0.001557 1.000000 0.062570 0.011719 \n", + "loudness -0.282310 0.020048 0.062570 1.000000 -0.018038 \n", + "mode -0.053474 -0.157706 0.011719 -0.018038 1.000000 \n", + "speechiness -0.104155 0.027993 0.235681 0.007524 -0.076368 \n", + "tempo -0.027658 0.002152 0.002506 0.178224 0.011123 \n", + "time_signature -0.045885 0.006778 -0.012652 0.161884 -0.012978 \n", + "valence -0.174594 0.025340 -0.034016 0.192464 0.009370 \n", + "rank 0.064075 0.001331 -0.007086 -0.049147 0.004267 \n", + "album_length 0.093708 -0.006069 0.058215 -0.116179 0.012662 \n", + "weeks_top -0.003262 -0.002369 -0.006417 -0.022634 0.015574 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.015607 -0.181678 -0.183628 -0.222275 0.040918 \n", + "danceability 0.153791 -0.100058 0.203910 0.545940 -0.024367 \n", + "duration_ms -0.068570 0.013874 0.056457 -0.126540 -0.005704 \n", + "energy 0.115465 0.219666 0.190627 0.313298 -0.030172 \n", + "instrumentalness -0.104155 -0.027658 -0.045885 -0.174594 0.064075 \n", + "key 0.027993 0.002152 0.006778 0.025340 0.001331 \n", + "liveness 0.235681 0.002506 -0.012652 -0.034016 -0.007086 \n", + "loudness 0.007524 0.178224 0.161884 0.192464 -0.049147 \n", + "mode -0.076368 0.011123 -0.012978 0.009370 0.004267 \n", + "speechiness 1.000000 -0.030372 -0.010185 0.029355 -0.020477 \n", + "tempo -0.030372 1.000000 0.059849 0.112866 -0.003569 \n", + "time_signature -0.010185 0.059849 1.000000 0.162042 -0.011134 \n", + "valence 0.029355 0.112866 0.162042 1.000000 0.008667 \n", + "rank -0.020477 -0.003569 -0.011134 0.008667 1.000000 \n", + "album_length 0.014656 -0.033232 -0.041868 -0.023429 0.010702 \n", + "weeks_top -0.013571 0.000926 -0.004096 0.006013 -0.093513 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.112558 0.019238 \n", + "danceability -0.063765 -0.021480 \n", + "duration_ms -0.040620 -0.001930 \n", + "energy -0.079614 -0.021812 \n", + "instrumentalness 0.093708 -0.003262 \n", + "key -0.006069 -0.002369 \n", + "liveness 0.058215 -0.006417 \n", + "loudness -0.116179 -0.022634 \n", + "mode 0.012662 0.015574 \n", + "speechiness 0.014656 -0.013571 \n", + "tempo -0.033232 0.000926 \n", + "time_signature -0.041868 -0.004096 \n", + "valence -0.023429 0.006013 \n", + "rank 0.010702 -0.093513 \n", + "album_length 1.000000 0.071388 \n", + "weeks_top 0.071388 1.000000 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking[df_ranking['rank']>=40].corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "#sns.pairplot(df_ranking)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import linear_model\n", + "from sklearn.preprocessing import PolynomialFeatures, StandardScaler\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'song', 'album', 'artist', 'acousticness', 'danceability',\n", + " 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',\n", + " 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',\n", + " 'album_id', 'date', 'rank', 'album_length', 'weeks_top'],\n", + " dtype='object')" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_top
00.05550.754142301.00.6630.0000006.00.101-6.3110.00.42790.1954.00.20720.03.0
10.29200.860152829.00.4180.0000007.00.106-9.0610.00.158126.0234.00.37420.03.0
20.15300.718215305.00.4540.0000468.00.116-9.0121.00.12789.4834.00.19620.03.0
30.01530.581189487.00.6620.0000009.00.111-5.2391.00.30393.0234.00.43420.03.0
40.02350.736192779.00.6220.0000006.00.151-4.6530.00.133191.9714.00.50620.03.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness key \\\n", + "0 0.0555 0.754 142301.0 0.663 0.000000 6.0 \n", + "1 0.2920 0.860 152829.0 0.418 0.000000 7.0 \n", + "2 0.1530 0.718 215305.0 0.454 0.000046 8.0 \n", + "3 0.0153 0.581 189487.0 0.662 0.000000 9.0 \n", + "4 0.0235 0.736 192779.0 0.622 0.000000 6.0 \n", + "\n", + " liveness loudness mode speechiness tempo time_signature valence \\\n", + "0 0.101 -6.311 0.0 0.427 90.195 4.0 0.207 \n", + "1 0.106 -9.061 0.0 0.158 126.023 4.0 0.374 \n", + "2 0.116 -9.012 1.0 0.127 89.483 4.0 0.196 \n", + "3 0.111 -5.239 1.0 0.303 93.023 4.0 0.434 \n", + "4 0.151 -4.653 0.0 0.133 191.971 4.0 0.506 \n", + "\n", + " album_length weeks_top \n", + "0 20.0 3.0 \n", + "1 20.0 3.0 \n", + "2 20.0 3.0 \n", + "3 20.0 3.0 \n", + "4 20.0 3.0 " + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = df_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = df_ranking[\"rank\"]\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nclf = linear_model.SGDRegressor()\\nclf.fit(x_train, y_train)\\n\\n# clf is a trained model\\n\\ny_predicted = clf.predict(x_test)\\n\\n'" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\"\"\"\n", + "clf = linear_model.SGDRegressor()\n", + "clf.fit(x_train, y_train)\n", + "\n", + "# clf is a trained model\n", + "\n", + "y_predicted = clf.predict(x_test)\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "#clf.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# Create linear regression\n", + "regr = linear_model.LinearRegression()\n", + "\n", + "# Fit the linear regression\n", + "model = regr.fit(x_train, y_train)\n", + "# model = regr.fit(X_inter, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06136701672338518" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06176784039100125" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "knnr = KNeighborsRegressor(n_neighbors = 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "modelkn = knnr.fit(x_train, y_train) #fit the model\n", + "y_pred = knnr.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.031727221643046866" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "modelkn.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "tree = DecisionTreeRegressor(random_state = 29)\n", + "model_tree = tree.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10875448097643847" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_tree.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.196245-0.120949-0.7062070.118138-0.023548-0.006694-0.5648740.0806220.008277-0.175597-0.186598-0.2191840.0785270.101783-0.001565
danceability-0.1962451.000000-0.0599580.110363-0.1909940.022932-0.1526060.147712-0.0702430.174174-0.1104150.2049450.528247-0.063117-0.051154-0.012421
duration_ms-0.120949-0.0599581.0000000.0679770.0456240.0047580.0676540.045632-0.048022-0.0792620.0187410.060452-0.123794-0.007234-0.0439100.009410
energy-0.7062070.1103630.0679771.000000-0.1194900.0302060.1689780.760206-0.0681250.1018270.2138480.1922080.312593-0.054419-0.069048-0.004840
instrumentalness0.118138-0.1909940.045624-0.1194901.000000-0.007926-0.034715-0.275935-0.040215-0.106122-0.022825-0.046484-0.1579120.1024550.094062-0.003196
key-0.0235480.0229320.0047580.030206-0.0079261.000000-0.0005650.016887-0.1622580.031916-0.0024990.0063370.026532-0.000747-0.003424-0.002516
liveness-0.006694-0.1526060.0676540.168978-0.034715-0.0005651.0000000.0443880.0107720.2214600.002441-0.014402-0.0267700.0118810.0688290.009502
loudness-0.5648740.1477120.0456320.760206-0.2759350.0168870.0443881.000000-0.029998-0.0011240.1709740.1691110.179312-0.101858-0.102410-0.019761
mode0.080622-0.070243-0.048022-0.068125-0.040215-0.1622580.010772-0.0299981.000000-0.0852740.014724-0.015830-0.0004690.0242170.0076100.012935
speechiness0.0082770.174174-0.0792620.101827-0.1061220.0319160.221460-0.001124-0.0852741.000000-0.031299-0.0076320.047409-0.0448720.028152-0.019129
tempo-0.175597-0.1104150.0187410.213848-0.022825-0.0024990.0024410.1709740.014724-0.0312991.0000000.0577680.099147-0.003032-0.0324500.000883
time_signature-0.1865980.2049450.0604520.192208-0.0464840.006337-0.0144020.169111-0.015830-0.0076320.0577681.0000000.159691-0.025005-0.0349460.003075
valence-0.2191840.528247-0.1237940.312593-0.1579120.026532-0.0267700.179312-0.0004690.0474090.0991470.1596911.0000000.014298-0.0171170.018637
rank0.078527-0.063117-0.007234-0.0544190.102455-0.0007470.011881-0.1018580.024217-0.044872-0.003032-0.0250050.0142981.0000000.023332-0.184133
album_length0.101783-0.051154-0.043910-0.0690480.094062-0.0034240.068829-0.1024100.0076100.028152-0.032450-0.034946-0.0171170.0233321.0000000.092760
weeks_top-0.001565-0.0124210.009410-0.004840-0.003196-0.0025160.009502-0.0197610.012935-0.0191290.0008830.0030750.018637-0.1841330.0927601.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.196245 -0.120949 -0.706207 \n", + "danceability -0.196245 1.000000 -0.059958 0.110363 \n", + "duration_ms -0.120949 -0.059958 1.000000 0.067977 \n", + "energy -0.706207 0.110363 0.067977 1.000000 \n", + "instrumentalness 0.118138 -0.190994 0.045624 -0.119490 \n", + "key -0.023548 0.022932 0.004758 0.030206 \n", + "liveness -0.006694 -0.152606 0.067654 0.168978 \n", + "loudness -0.564874 0.147712 0.045632 0.760206 \n", + "mode 0.080622 -0.070243 -0.048022 -0.068125 \n", + "speechiness 0.008277 0.174174 -0.079262 0.101827 \n", + "tempo -0.175597 -0.110415 0.018741 0.213848 \n", + "time_signature -0.186598 0.204945 0.060452 0.192208 \n", + "valence -0.219184 0.528247 -0.123794 0.312593 \n", + "rank 0.078527 -0.063117 -0.007234 -0.054419 \n", + "album_length 0.101783 -0.051154 -0.043910 -0.069048 \n", + "weeks_top -0.001565 -0.012421 0.009410 -0.004840 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.118138 -0.023548 -0.006694 -0.564874 0.080622 \n", + "danceability -0.190994 0.022932 -0.152606 0.147712 -0.070243 \n", + "duration_ms 0.045624 0.004758 0.067654 0.045632 -0.048022 \n", + "energy -0.119490 0.030206 0.168978 0.760206 -0.068125 \n", + "instrumentalness 1.000000 -0.007926 -0.034715 -0.275935 -0.040215 \n", + "key -0.007926 1.000000 -0.000565 0.016887 -0.162258 \n", + "liveness -0.034715 -0.000565 1.000000 0.044388 0.010772 \n", + "loudness -0.275935 0.016887 0.044388 1.000000 -0.029998 \n", + "mode -0.040215 -0.162258 0.010772 -0.029998 1.000000 \n", + "speechiness -0.106122 0.031916 0.221460 -0.001124 -0.085274 \n", + "tempo -0.022825 -0.002499 0.002441 0.170974 0.014724 \n", + "time_signature -0.046484 0.006337 -0.014402 0.169111 -0.015830 \n", + "valence -0.157912 0.026532 -0.026770 0.179312 -0.000469 \n", + "rank 0.102455 -0.000747 0.011881 -0.101858 0.024217 \n", + "album_length 0.094062 -0.003424 0.068829 -0.102410 0.007610 \n", + "weeks_top -0.003196 -0.002516 0.009502 -0.019761 0.012935 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.008277 -0.175597 -0.186598 -0.219184 0.078527 \n", + "danceability 0.174174 -0.110415 0.204945 0.528247 -0.063117 \n", + "duration_ms -0.079262 0.018741 0.060452 -0.123794 -0.007234 \n", + "energy 0.101827 0.213848 0.192208 0.312593 -0.054419 \n", + "instrumentalness -0.106122 -0.022825 -0.046484 -0.157912 0.102455 \n", + "key 0.031916 -0.002499 0.006337 0.026532 -0.000747 \n", + "liveness 0.221460 0.002441 -0.014402 -0.026770 0.011881 \n", + "loudness -0.001124 0.170974 0.169111 0.179312 -0.101858 \n", + "mode -0.085274 0.014724 -0.015830 -0.000469 0.024217 \n", + "speechiness 1.000000 -0.031299 -0.007632 0.047409 -0.044872 \n", + "tempo -0.031299 1.000000 0.057768 0.099147 -0.003032 \n", + "time_signature -0.007632 0.057768 1.000000 0.159691 -0.025005 \n", + "valence 0.047409 0.099147 0.159691 1.000000 0.014298 \n", + "rank -0.044872 -0.003032 -0.025005 0.014298 1.000000 \n", + "album_length 0.028152 -0.032450 -0.034946 -0.017117 0.023332 \n", + "weeks_top -0.019129 0.000883 0.003075 0.018637 -0.184133 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.101783 -0.001565 \n", + "danceability -0.051154 -0.012421 \n", + "duration_ms -0.043910 0.009410 \n", + "energy -0.069048 -0.004840 \n", + "instrumentalness 0.094062 -0.003196 \n", + "key -0.003424 -0.002516 \n", + "liveness 0.068829 0.009502 \n", + "loudness -0.102410 -0.019761 \n", + "mode 0.007610 0.012935 \n", + "speechiness 0.028152 -0.019129 \n", + "tempo -0.032450 0.000883 \n", + "time_signature -0.034946 0.003075 \n", + "valence -0.017117 0.018637 \n", + "rank 0.023332 -0.184133 \n", + "album_length 1.000000 0.092760 \n", + "weeks_top 0.092760 1.000000 " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ranking.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.10788110644999849" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.15080483992299143" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "poly 4_model.score(x_test, y_test) 0.10449768259651138\n" + ] + } + ], + "source": [ + "\"\"\"poly_model = make_pipeline (PolynomialFeatures(4), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)\"\"\"\n", + "print(\"poly 4_model.score(x_test, y_test)\",0.10449768259651138)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.decomposition import PCA, FastICA" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "pca = PCA(n_components=0.98, whiten=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "x_pca = pca.fit_transform(x_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original number of features: (271839, 15)\n", + "Reduced number of features: (271839, 1)\n" + ] + } + ], + "source": [ + "print(\"Original number of features:\", x_train.shape)\n", + "print(\"Reduced number of features:\", x_pca.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5.679213538612037e-17" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9999981606742969" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'song', 'album', 'artist', 'acousticness', 'danceability',\n", + " 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',\n", + " 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',\n", + " 'album_id', 'date'],\n", + " dtype='object')" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_acoustic_features.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "acoustic_features = df_acoustic_features.groupby(by=[\"artist\",'album'],as_index=False).agg({'acousticness':\"mean\", 'danceability':\"mean\",\n", + " 'duration_ms':\"mean\", 'energy':\"mean\", 'instrumentalness':\"mean\", 'key':\"mean\", 'liveness':\"mean\",\n", + " 'loudness':\"mean\", 'mode':\"mean\", 'speechiness':\"mean\", 'tempo':\"mean\", 'time_signature':\"mean\", 'valence':\"mean\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
artistalbumacousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalence
0Roots Of Country Music (1965)0.9551200.5914183874.120.403120.1236934.80.235784-10.556440.940.07342125.944583.840.82632
1Silhouette0.9350000.7310223507.000.167000.0000006.00.124000-5.990000.000.0297094.932004.000.27400
2!!! (Chk Chk Chk)Myth Takes0.0919730.6465290017.300.831800.2548665.40.233740-6.939000.500.06929116.136204.000.56120
\n", + "
" + ], + "text/plain": [ + " artist album acousticness \\\n", + "0 Roots Of Country Music (1965) 0.955120 \n", + "1 Silhouette 0.935000 \n", + "2 !!! (Chk Chk Chk) Myth Takes 0.091973 \n", + "\n", + " danceability duration_ms energy instrumentalness key liveness \\\n", + "0 0.5914 183874.12 0.40312 0.123693 4.8 0.235784 \n", + "1 0.7310 223507.00 0.16700 0.000000 6.0 0.124000 \n", + "2 0.6465 290017.30 0.83180 0.254866 5.4 0.233740 \n", + "\n", + " loudness mode speechiness tempo time_signature valence \n", + "0 -10.55644 0.94 0.07342 125.94458 3.84 0.82632 \n", + "1 -5.99000 0.00 0.02970 94.93200 4.00 0.27400 \n", + "2 -6.93900 0.50 0.06929 116.13620 4.00 0.56120 " + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "acoustic_features.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "albums.drop(columns=[\"album_length\",\"track_length\" ], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "album_rankings = acoustic_features.merge(albums, how=\"left\", on=[\"artist\",\"album\"] )" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "album_rankings.dropna(how=\"any\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalenceweeks_top
20.0919730.646500290017.3000000.8318002.548659e-015.4000000.233740-6.9390000.5000000.069290116.1362004.0000000.5612001.0
30.1035430.617333224445.7500000.7794171.455423e-036.7500000.186483-8.7333330.9166670.055342126.1877504.0000000.73425026.0
40.1803320.635833230081.0833330.5824171.541667e-075.1666670.143817-11.0303330.8333330.053600138.0076674.0000000.6762506.0
50.1026700.602583211534.5000000.6899178.552500e-065.8333330.178708-8.4510830.7500000.055700128.0610003.9166670.67708356.0
60.2059180.716818205242.4545450.6499093.612727e-065.3636360.122418-9.8486360.7272730.064791142.5630914.0000000.77063626.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness \\\n", + "2 0.091973 0.646500 290017.300000 0.831800 2.548659e-01 \n", + "3 0.103543 0.617333 224445.750000 0.779417 1.455423e-03 \n", + "4 0.180332 0.635833 230081.083333 0.582417 1.541667e-07 \n", + "5 0.102670 0.602583 211534.500000 0.689917 8.552500e-06 \n", + "6 0.205918 0.716818 205242.454545 0.649909 3.612727e-06 \n", + "\n", + " key liveness loudness mode speechiness tempo \\\n", + "2 5.400000 0.233740 -6.939000 0.500000 0.069290 116.136200 \n", + "3 6.750000 0.186483 -8.733333 0.916667 0.055342 126.187750 \n", + "4 5.166667 0.143817 -11.030333 0.833333 0.053600 138.007667 \n", + "5 5.833333 0.178708 -8.451083 0.750000 0.055700 128.061000 \n", + "6 5.363636 0.122418 -9.848636 0.727273 0.064791 142.563091 \n", + "\n", + " time_signature valence weeks_top \n", + "2 4.000000 0.561200 1.0 \n", + "3 4.000000 0.734250 26.0 \n", + "4 4.000000 0.676250 6.0 \n", + "5 3.916667 0.677083 56.0 \n", + "6 4.000000 0.770636 26.0 " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = album_rankings.drop(columns=[\"album\",\"artist\",\"rank\",\"date_start\",\"date_end\",\"days_top\"])\n", + "y = album_rankings[\"rank\"]\n", + "x.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.1082450723739965" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsongalbumartistacousticnessdanceabilityduration_msenergyinstrumentalnesskey...modespeechinesstempotime_signaturevalencealbum_iddaterankalbum_lengthweeks_top
00Veyvc3n9AcLSoK3r1dA12Voices In My HeadHoodie SZNA Boogie Wit da Hoodie0.05550.754142301.00.6630.06.0...0.00.42790.1954.00.2073r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
177JzXZonNumWsuXKy9vr3UBeastyHoodie SZNA Boogie Wit da Hoodie0.29200.860152829.00.4180.07.0...0.00.158126.0234.00.3743r5hf3Cj3EMh1C2saQ8jyt2018-12-211.020.03.0
\n", + "

2 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " id song album \\\n", + "0 0Veyvc3n9AcLSoK3r1dA12 Voices In My Head Hoodie SZN \n", + "1 77JzXZonNumWsuXKy9vr3U Beasty Hoodie SZN \n", + "\n", + " artist acousticness danceability duration_ms energy \\\n", + "0 A Boogie Wit da Hoodie 0.0555 0.754 142301.0 0.663 \n", + "1 A Boogie Wit da Hoodie 0.2920 0.860 152829.0 0.418 \n", + "\n", + " instrumentalness key ... mode speechiness tempo time_signature \\\n", + "0 0.0 6.0 ... 0.0 0.427 90.195 4.0 \n", + "1 0.0 7.0 ... 0.0 0.158 126.023 4.0 \n", + "\n", + " valence album_id date rank album_length weeks_top \n", + "0 0.207 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "1 0.374 3r5hf3Cj3EMh1C2saQ8jyt 2018-12-21 1.0 20.0 3.0 \n", + "\n", + "[2 rows x 22 columns]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_40 = df_ranking[df_ranking[\"rank\"]<=40]\n", + "top_40.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "131333" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = top_40.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = top_40[\"rank\"]\n", + "x.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.1006890820016928" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "recent_ranking = df_ranking[df_ranking.date > '2004-12-31']\n", + "old_ranking = df_ranking[df_ranking.date <= '2004-12-31']" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(1991, 1, 12, 0, 0)" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datetime import datetime\n", + "df_albums.date.max()\n", + "a = datetime(1963, 1, 5)\n", + "b = datetime(2019, 1 ,19)\n", + "a + (b - a)/2" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9993704141733303" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_ranking.id.count()/recent_ranking.id.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_top
00.05550.754142301.00.6630.06.00.101-6.3110.00.42790.1954.00.20720.03.0
10.29200.860152829.00.4180.07.00.106-9.0610.00.158126.0234.00.37420.03.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness key \\\n", + "0 0.0555 0.754 142301.0 0.663 0.0 6.0 \n", + "1 0.2920 0.860 152829.0 0.418 0.0 7.0 \n", + "\n", + " liveness loudness mode speechiness tempo time_signature valence \\\n", + "0 0.101 -6.311 0.0 0.427 90.195 4.0 0.207 \n", + "1 0.106 -9.061 0.0 0.158 126.023 4.0 0.374 \n", + "\n", + " album_length weeks_top \n", + "0 20.0 3.0 \n", + "1 20.0 3.0 " + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = recent_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = recent_ranking[\"rank\"]\n", + "x.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12799465635727159" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "x = old_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = old_ranking[\"rank\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12799465635727159" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(3), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6128328806147163" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "flash_hits = df_ranking[df_ranking.weeks_top <= 13]\n", + "flash_hits.energy.count()/df_ranking.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "x = flash_hits.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = flash_hits[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.24703788868644594" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "test_ranking = df_ranking[df_ranking.date > '1961-12-31']\n", + "#test_ranking = df_ranking[df_ranking.date > '2008-12-31']" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8850850061359804" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking = test_ranking[test_ranking.weeks_top <= 52]\n", + "\n", + "test_ranking.energy.count()/df_ranking.energy.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "x = test_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = test_ranking[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.37654698821670396" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "\n", + "poly_model = make_pipeline (PolynomialFeatures(2), linear_model.LinearRegression())\n", + "\n", + "model = poly_model.fit(x_train, y_train)\n", + "\n", + "poly_model.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = poly_model.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.000000-0.193424-0.114031-0.7055590.120800-0.024165-0.006213-0.5651240.0802290.000599-0.175110-0.185099-0.2145120.0805850.1132210.026523
danceability-0.1934241.000000-0.0611570.106031-0.1925370.022180-0.1557260.144868-0.0689590.176857-0.1125430.2034490.529373-0.066055-0.0462640.083294
duration_ms-0.114031-0.0611571.0000000.0635900.0479740.0041260.0686720.039692-0.046986-0.0786290.0178480.058427-0.121044-0.006523-0.037612-0.003142
energy-0.7055590.1060310.0635901.000000-0.1227640.0303700.1694830.761897-0.0674790.1093290.2139650.1896630.305588-0.058473-0.080271-0.078506
instrumentalness0.120800-0.1925370.047974-0.1227641.000000-0.009171-0.040985-0.277801-0.040583-0.111850-0.023195-0.045723-0.1608710.1005940.094270-0.048149
key-0.0241650.0221800.0041260.030370-0.0091711.000000-0.0007340.017070-0.1653980.031355-0.0021990.0067280.025884-0.002136-0.0032520.004185
liveness-0.006213-0.1557260.0686720.169483-0.040985-0.0007341.0000000.0476590.0134820.2191410.002841-0.014975-0.0309200.0070930.080717-0.041585
loudness-0.5651240.1448680.0396920.761897-0.2778010.0170700.0476591.000000-0.0303750.0088340.1704330.1644820.177095-0.105550-0.102282-0.123773
mode0.080229-0.068959-0.046986-0.067479-0.040583-0.1653980.013482-0.0303751.000000-0.0838050.015805-0.0161390.0014470.0299960.0117020.004643
speechiness0.0005990.176857-0.0786290.109329-0.1118500.0313550.2191410.008834-0.0838051.000000-0.032465-0.0050640.046756-0.0581290.037621-0.032551
tempo-0.175110-0.1125430.0178480.213965-0.023195-0.0021990.0028410.1704330.015805-0.0324651.0000000.0567560.097036-0.003143-0.033271-0.017837
time_signature-0.1850990.2034490.0584270.189663-0.0457230.006728-0.0149750.164482-0.016139-0.0050640.0567561.0000000.157769-0.024061-0.0318220.016646
valence-0.2145120.529373-0.1210440.305588-0.1608710.025884-0.0309200.1770950.0014470.0467560.0970360.1577691.0000000.014829-0.0120780.101173
rank0.080585-0.066055-0.006523-0.0584730.100594-0.0021360.007093-0.1055500.029996-0.058129-0.003143-0.0240610.0148291.0000000.029841-0.503327
album_length0.113221-0.046264-0.037612-0.0802710.094270-0.0032520.080717-0.1022820.0117020.037621-0.033271-0.031822-0.0120780.0298411.000000-0.066330
weeks_top0.0265230.083294-0.003142-0.078506-0.0481490.004185-0.041585-0.1237730.004643-0.032551-0.0178370.0166460.101173-0.503327-0.0663301.000000
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.000000 -0.193424 -0.114031 -0.705559 \n", + "danceability -0.193424 1.000000 -0.061157 0.106031 \n", + "duration_ms -0.114031 -0.061157 1.000000 0.063590 \n", + "energy -0.705559 0.106031 0.063590 1.000000 \n", + "instrumentalness 0.120800 -0.192537 0.047974 -0.122764 \n", + "key -0.024165 0.022180 0.004126 0.030370 \n", + "liveness -0.006213 -0.155726 0.068672 0.169483 \n", + "loudness -0.565124 0.144868 0.039692 0.761897 \n", + "mode 0.080229 -0.068959 -0.046986 -0.067479 \n", + "speechiness 0.000599 0.176857 -0.078629 0.109329 \n", + "tempo -0.175110 -0.112543 0.017848 0.213965 \n", + "time_signature -0.185099 0.203449 0.058427 0.189663 \n", + "valence -0.214512 0.529373 -0.121044 0.305588 \n", + "rank 0.080585 -0.066055 -0.006523 -0.058473 \n", + "album_length 0.113221 -0.046264 -0.037612 -0.080271 \n", + "weeks_top 0.026523 0.083294 -0.003142 -0.078506 \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness 0.120800 -0.024165 -0.006213 -0.565124 0.080229 \n", + "danceability -0.192537 0.022180 -0.155726 0.144868 -0.068959 \n", + "duration_ms 0.047974 0.004126 0.068672 0.039692 -0.046986 \n", + "energy -0.122764 0.030370 0.169483 0.761897 -0.067479 \n", + "instrumentalness 1.000000 -0.009171 -0.040985 -0.277801 -0.040583 \n", + "key -0.009171 1.000000 -0.000734 0.017070 -0.165398 \n", + "liveness -0.040985 -0.000734 1.000000 0.047659 0.013482 \n", + "loudness -0.277801 0.017070 0.047659 1.000000 -0.030375 \n", + "mode -0.040583 -0.165398 0.013482 -0.030375 1.000000 \n", + "speechiness -0.111850 0.031355 0.219141 0.008834 -0.083805 \n", + "tempo -0.023195 -0.002199 0.002841 0.170433 0.015805 \n", + "time_signature -0.045723 0.006728 -0.014975 0.164482 -0.016139 \n", + "valence -0.160871 0.025884 -0.030920 0.177095 0.001447 \n", + "rank 0.100594 -0.002136 0.007093 -0.105550 0.029996 \n", + "album_length 0.094270 -0.003252 0.080717 -0.102282 0.011702 \n", + "weeks_top -0.048149 0.004185 -0.041585 -0.123773 0.004643 \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness 0.000599 -0.175110 -0.185099 -0.214512 0.080585 \n", + "danceability 0.176857 -0.112543 0.203449 0.529373 -0.066055 \n", + "duration_ms -0.078629 0.017848 0.058427 -0.121044 -0.006523 \n", + "energy 0.109329 0.213965 0.189663 0.305588 -0.058473 \n", + "instrumentalness -0.111850 -0.023195 -0.045723 -0.160871 0.100594 \n", + "key 0.031355 -0.002199 0.006728 0.025884 -0.002136 \n", + "liveness 0.219141 0.002841 -0.014975 -0.030920 0.007093 \n", + "loudness 0.008834 0.170433 0.164482 0.177095 -0.105550 \n", + "mode -0.083805 0.015805 -0.016139 0.001447 0.029996 \n", + "speechiness 1.000000 -0.032465 -0.005064 0.046756 -0.058129 \n", + "tempo -0.032465 1.000000 0.056756 0.097036 -0.003143 \n", + "time_signature -0.005064 0.056756 1.000000 0.157769 -0.024061 \n", + "valence 0.046756 0.097036 0.157769 1.000000 0.014829 \n", + "rank -0.058129 -0.003143 -0.024061 0.014829 1.000000 \n", + "album_length 0.037621 -0.033271 -0.031822 -0.012078 0.029841 \n", + "weeks_top -0.032551 -0.017837 0.016646 0.101173 -0.503327 \n", + "\n", + " album_length weeks_top \n", + "acousticness 0.113221 0.026523 \n", + "danceability -0.046264 0.083294 \n", + "duration_ms -0.037612 -0.003142 \n", + "energy -0.080271 -0.078506 \n", + "instrumentalness 0.094270 -0.048149 \n", + "key -0.003252 0.004185 \n", + "liveness 0.080717 -0.041585 \n", + "loudness -0.102282 -0.123773 \n", + "mode 0.011702 0.004643 \n", + "speechiness 0.037621 -0.032551 \n", + "tempo -0.033271 -0.017837 \n", + "time_signature -0.031822 0.016646 \n", + "valence -0.012078 0.101173 \n", + "rank 0.029841 -0.503327 \n", + "album_length 1.000000 -0.066330 \n", + "weeks_top -0.066330 1.000000 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.37654698821670396" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import r2_score\n", + "\n", + "r2_score(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)\\nmodel_clf= clf.fit(x_train, y_train)\\nmodel_clf.score(x_test, y_test)'" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)\n", + "model_clf= clf.fit(x_train, y_train)\n", + "model_clf.score(x_test, y_test)\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "mask_corr = test_ranking.corr() " + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencerankalbum_lengthweeks_top
acousticness1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
danceabilityNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
duration_msNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
energyNaNNaNNaN1.000000NaNNaNNaN0.761897NaNNaNNaNNaNNaNNaNNaNNaN
instrumentalnessNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
keyNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
livenessNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
loudnessNaNNaNNaN0.761897NaNNaNNaN1.000000NaNNaNNaNNaNNaNNaNNaNNaN
modeNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaN
speechinessNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaN
tempoNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaN
time_signatureNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaN
valenceNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN
rankNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaN
album_lengthNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaN
weeks_topNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy \\\n", + "acousticness 1.0 NaN NaN NaN \n", + "danceability NaN 1.0 NaN NaN \n", + "duration_ms NaN NaN 1.0 NaN \n", + "energy NaN NaN NaN 1.000000 \n", + "instrumentalness NaN NaN NaN NaN \n", + "key NaN NaN NaN NaN \n", + "liveness NaN NaN NaN NaN \n", + "loudness NaN NaN NaN 0.761897 \n", + "mode NaN NaN NaN NaN \n", + "speechiness NaN NaN NaN NaN \n", + "tempo NaN NaN NaN NaN \n", + "time_signature NaN NaN NaN NaN \n", + "valence NaN NaN NaN NaN \n", + "rank NaN NaN NaN NaN \n", + "album_length NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN \n", + "\n", + " instrumentalness key liveness loudness mode \\\n", + "acousticness NaN NaN NaN NaN NaN \n", + "danceability NaN NaN NaN NaN NaN \n", + "duration_ms NaN NaN NaN NaN NaN \n", + "energy NaN NaN NaN 0.761897 NaN \n", + "instrumentalness 1.0 NaN NaN NaN NaN \n", + "key NaN 1.0 NaN NaN NaN \n", + "liveness NaN NaN 1.0 NaN NaN \n", + "loudness NaN NaN NaN 1.000000 NaN \n", + "mode NaN NaN NaN NaN 1.0 \n", + "speechiness NaN NaN NaN NaN NaN \n", + "tempo NaN NaN NaN NaN NaN \n", + "time_signature NaN NaN NaN NaN NaN \n", + "valence NaN NaN NaN NaN NaN \n", + "rank NaN NaN NaN NaN NaN \n", + "album_length NaN NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN NaN \n", + "\n", + " speechiness tempo time_signature valence rank \\\n", + "acousticness NaN NaN NaN NaN NaN \n", + "danceability NaN NaN NaN NaN NaN \n", + "duration_ms NaN NaN NaN NaN NaN \n", + "energy NaN NaN NaN NaN NaN \n", + "instrumentalness NaN NaN NaN NaN NaN \n", + "key NaN NaN NaN NaN NaN \n", + "liveness NaN NaN NaN NaN NaN \n", + "loudness NaN NaN NaN NaN NaN \n", + "mode NaN NaN NaN NaN NaN \n", + "speechiness 1.0 NaN NaN NaN NaN \n", + "tempo NaN 1.0 NaN NaN NaN \n", + "time_signature NaN NaN 1.0 NaN NaN \n", + "valence NaN NaN NaN 1.0 NaN \n", + "rank NaN NaN NaN NaN 1.0 \n", + "album_length NaN NaN NaN NaN NaN \n", + "weeks_top NaN NaN NaN NaN NaN \n", + "\n", + " album_length weeks_top \n", + "acousticness NaN NaN \n", + "danceability NaN NaN \n", + "duration_ms NaN NaN \n", + "energy NaN NaN \n", + "instrumentalness NaN NaN \n", + "key NaN NaN \n", + "liveness NaN NaN \n", + "loudness NaN NaN \n", + "mode NaN NaN \n", + "speechiness NaN NaN \n", + "tempo NaN NaN \n", + "time_signature NaN NaN \n", + "valence NaN NaN \n", + "rank NaN NaN \n", + "album_length 1.0 NaN \n", + "weeks_top NaN 1.0 " + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask_corr[mask_corr>0.7]" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "test_ranking.loc[test_ranking['rank'] <= 10, 'rank'] = 1\n", + "test_ranking.loc[test_ranking['rank'] > 10, 'rank'] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "x = test_ranking.drop(columns=[\"song\",\"album\",\"artist\",\"id\",\"rank\",\"album_id\",\"date\"])\n", + "y = test_ranking[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"from sklearn.preprocessing import MinMaxScaler, StandardScaler # what's the difference??\\n\\nscaler = MinMaxScaler(feature_range=(-2,2))\\nds['thalach'] = scaler.fit_transform(ds.thalach.values.reshape(-1, 1))\\nds.describe()\"" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"from sklearn.preprocessing import MinMaxScaler, StandardScaler # what's the difference??\n", + "\n", + "scaler = MinMaxScaler(feature_range=(-2,2))\n", + "ds['thalach'] = scaler.fit_transform(ds.thalach.values.reshape(-1, 1))\n", + "ds.describe()\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.8750477963791126" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr = LogisticRegression()\n", + "log_mod = lr.fit(x_train,y_train)\n", + "\n", + "y_pred = log_mod.predict(x_test)\n", + "log_mod.score(x_test,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8562118668018819" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "dtc = DecisionTreeClassifier()\n", + "dtc.fit(x_train, y_train)\n", + "\n", + "dtc.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'from sklearn.svm import SVC\\nsvm = SVC()\\nsvm.fit(x_train, y_train)\\n\\nacc = svm.score(x_test,y_test)'" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"from sklearn.svm import SVC\n", + "svm = SVC()\n", + "svm.fit(x_train, y_train)\n", + "\n", + "acc = svm.score(x_test,y_test)\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8760452860301574" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "knn = KNeighborsClassifier(n_neighbors = 23) # n_neighbors means k\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.model_selection import GridSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': 23}" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "param_grid = {'n_neighbors': np.arange(1,30)}\n", + "knn = KNeighborsClassifier()\n", + "knn_cv = GridSearchCV(knn, param_grid, cv=5)\n", + "knn_cv.fit(x_train,y_train)\n", + "knn_cv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8699938488138186" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn = KNeighborsClassifier(n_neighbors = 2) # n_neighbors means k\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " class 0 0.88 0.99 0.93 52690\n", + " class 1 0.35 0.06 0.10 7461\n", + "\n", + " accuracy 0.87 60151\n", + " macro avg 0.61 0.52 0.51 60151\n", + "weighted avg 0.81 0.87 0.83 60151\n", + "\n" + ] + } + ], + "source": [ + "target_names = ['class 0', 'class 1']\n", + "print(classification_report(y_test, y_pred, target_names=target_names))" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted Not StwDevPredicted StwDev
True Not StwDev4242310267
True StwDev48942567
\n", + "
" + ], + "text/plain": [ + " Predicted Not StwDev Predicted StwDev\n", + "True Not StwDev 42423 10267\n", + "True StwDev 4894 2567" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "\n", + "cm = pd.DataFrame(\n", + " confusion_matrix(y_test, y_pred),\n", + " columns=['Predicted Not StwDev', 'Predicted StwDev'],\n", + " index=['True Not StwDev', 'True StwDev'])\n", + "\n", + "cm" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "52316 374 7142 319\n" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", + "print(tn, fp, fn, tp)" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 263582\n", + "1.0 37169\n", + "Name: rank, dtype: int64" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_ranking[\"rank\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12358728649281299" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "37169/ (263582 + 37169)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0 29708\n", + "0.0 29708\n", + "Name: rank, dtype: int64" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# resample the data\n", + "\n", + "from sklearn.utils import resample\n", + "\n", + "# concatenate our training data back together\n", + "\n", + "x_concat = pd.concat([x_train, y_train], axis=1)\n", + "\n", + "# separate minority and majority classes\n", + "\n", + "non_hit = x_concat[x_concat['rank']==0]\n", + "\n", + "hit = x_concat[x_concat['rank']==1]\n", + "\n", + "y_train.value_counts()\n", + "\n", + "# downsample non occupied sample\n", + "\n", + "non_hit_downsampled = resample(non_hit,\n", + "\n", + " replace = False, # sample without replacement\n", + "\n", + " n_samples = len(hit), # match minority n\n", + "\n", + " random_state = 29) # reproducible results\n", + "\n", + "# combine minority and downsampled majority\n", + "\n", + "downsampled = pd.concat([non_hit_downsampled, hit])\n", + "\n", + "# checking counts\n", + "\n", + "downsampled[\"rank\"].value_counts()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acousticnessdanceabilityduration_msenergyinstrumentalnesskeylivenessloudnessmodespeechinesstempotime_signaturevalencealbum_lengthweeks_toprank
2711890.37800.824180307.00.5170.00004310.00.0432-12.3961.00.0366124.0444.00.963012.018.00.0
2173060.86400.46661000.00.9630.00010210.00.7750-14.7530.00.8720102.6234.00.032215.03.00.0
1014930.04740.707279960.00.8380.0009215.00.0955-5.4800.00.0515117.9654.00.861013.02.00.0
595280.25100.435280376.00.7320.4420006.00.1450-10.0340.00.0275162.3664.00.751011.03.00.0
896370.67500.46996853.00.3170.0442002.00.3220-10.8631.00.0264159.8413.00.377024.01.00.0
\n", + "
" + ], + "text/plain": [ + " acousticness danceability duration_ms energy instrumentalness \\\n", + "271189 0.3780 0.824 180307.0 0.517 0.000043 \n", + "217306 0.8640 0.466 61000.0 0.963 0.000102 \n", + "101493 0.0474 0.707 279960.0 0.838 0.000921 \n", + "59528 0.2510 0.435 280376.0 0.732 0.442000 \n", + "89637 0.6750 0.469 96853.0 0.317 0.044200 \n", + "\n", + " key liveness loudness mode speechiness tempo time_signature \\\n", + "271189 10.0 0.0432 -12.396 1.0 0.0366 124.044 4.0 \n", + "217306 10.0 0.7750 -14.753 0.0 0.8720 102.623 4.0 \n", + "101493 5.0 0.0955 -5.480 0.0 0.0515 117.965 4.0 \n", + "59528 6.0 0.1450 -10.034 0.0 0.0275 162.366 4.0 \n", + "89637 2.0 0.3220 -10.863 1.0 0.0264 159.841 3.0 \n", + "\n", + " valence album_length weeks_top rank \n", + "271189 0.9630 12.0 18.0 0.0 \n", + "217306 0.0322 15.0 3.0 0.0 \n", + "101493 0.8610 13.0 2.0 0.0 \n", + "59528 0.7510 11.0 3.0 0.0 \n", + "89637 0.3770 24.0 1.0 0.0 " + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "downsampled.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "x_traind = downsampled.drop(columns=[\"rank\"])\n", + "y_traind = downsampled[\"rank\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.8038436601220262" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lr = LogisticRegression()\n", + "log_mod = lr.fit(x_traind,y_traind)\n", + "y_pred = log_mod.predict(x_test)\n", + "log_mod.score(x_test,y_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted Not StwDevPredicted StwDev
True Not StwDev429619729
True StwDev20705391
\n", + "
" + ], + "text/plain": [ + " Predicted Not StwDev Predicted StwDev\n", + "True Not StwDev 42961 9729\n", + "True StwDev 2070 5391" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cm = pd.DataFrame(\n", + " confusion_matrix(y_test, y_pred),\n", + " columns=['Predicted Not StwDev', 'Predicted StwDev'],\n", + " index=['True Not StwDev', 'True StwDev'])\n", + "\n", + "cm" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "42961 9729 2070 5391\n" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", + "print(tn, fp, fn, tp)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " class 0 0.95 0.82 0.88 52690\n", + " class 1 0.36 0.72 0.48 7461\n", + "\n", + " accuracy 0.80 60151\n", + " macro avg 0.66 0.77 0.68 60151\n", + "weighted avg 0.88 0.80 0.83 60151\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, y_pred, target_names=target_names))" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.750494588618643" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dtc = DecisionTreeClassifier()\n", + "dtc.fit(x_traind, y_traind)\n", + "\n", + "dtc.score(x_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7479509900084786" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn = KNeighborsClassifier(n_neighbors = 2) # n_neighbors means k\n", + "knn.fit(x_traind, y_traind)\n", + "y_pred = knn.predict(x_test)\n", + "\n", + "acc = knn.score(x_test, y_test)\n", + "acc" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "not enough values to unpack (expected 4, got 2)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 4, got 2)" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8153539571076106" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42961/(42961+9729)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}