diff --git a/data/googleplaystore.csv b/your-code/notebooks/data/googleplaystore.csv old mode 100755 new mode 100644 similarity index 100% rename from data/googleplaystore.csv rename to your-code/notebooks/data/googleplaystore.csv diff --git a/data/googleplaystore_user_reviews.csv b/your-code/notebooks/data/googleplaystore_user_reviews.csv old mode 100755 new mode 100644 similarity index 100% rename from data/googleplaystore_user_reviews.csv rename to your-code/notebooks/data/googleplaystore_user_reviews.csv diff --git a/your-code/notebooks/main.ipynb b/your-code/notebooks/main.ipynb index 07928c1..ecb4ea3 100755 --- a/your-code/notebooks/main.ipynb +++ b/your-code/notebooks/main.ipynb @@ -12,13 +12,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#Import your libraries\n", "import numpy as np\n", - "import pandas as pd" + "import pandas as pd\n", + "import pyforest" ] }, { @@ -60,11 +61,291 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid Ver
0Photo Editor & Candy Camera & Grid & ScrapBookART_AND_DESIGN4.115919M10,000+Free0EveryoneArt & DesignJanuary 7, 20181.0.04.0.3 and up
1Coloring book moanaART_AND_DESIGN3.996714M500,000+Free0EveryoneArt & Design;Pretend PlayJanuary 15, 20182.0.04.0.3 and up
2U Launcher Lite – FREE Live Cool Themes, Hide ...ART_AND_DESIGN4.7875108.7M5,000,000+Free0EveryoneArt & DesignAugust 1, 20181.2.44.0.3 and up
3Sketch - Draw & PaintART_AND_DESIGN4.521564425M50,000,000+Free0TeenArt & DesignJune 8, 2018Varies with device4.2 and up
4Pixel Draw - Number Art Coloring BookART_AND_DESIGN4.39672.8M100,000+Free0EveryoneArt & Design;CreativityJune 20, 20181.14.4 and up
..........................................
10836Sya9a Maroc - FRFAMILY4.53853M5,000+Free0EveryoneEducationJuly 25, 20171.484.1 and up
10837Fr. Mike Schmitz Audio TeachingsFAMILY5.043.6M100+Free0EveryoneEducationJuly 6, 20181.04.1 and up
10838Parkinson Exercices FRMEDICALNaN39.5M1,000+Free0EveryoneMedicalJanuary 20, 20171.02.2 and up
10839The SCP Foundation DB fr nn5nBOOKS_AND_REFERENCE4.5114Varies with device1,000+Free0Mature 17+Books & ReferenceJanuary 19, 2015Varies with deviceVaries with device
10840iHoroscope - 2018 Daily Horoscope & AstrologyLIFESTYLE4.539830719M10,000,000+Free0EveryoneLifestyleJuly 25, 2018Varies with deviceVaries with device
\n", + "

10841 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " App Category \\\n", + "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN \n", + "1 Coloring book moana ART_AND_DESIGN \n", + "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN \n", + "3 Sketch - Draw & Paint ART_AND_DESIGN \n", + "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN \n", + "... ... ... \n", + "10836 Sya9a Maroc - FR FAMILY \n", + "10837 Fr. Mike Schmitz Audio Teachings FAMILY \n", + "10838 Parkinson Exercices FR MEDICAL \n", + "10839 The SCP Foundation DB fr nn5n BOOKS_AND_REFERENCE \n", + "10840 iHoroscope - 2018 Daily Horoscope & Astrology LIFESTYLE \n", + "\n", + " Rating Reviews Size Installs Type Price \\\n", + "0 4.1 159 19M 10,000+ Free 0 \n", + "1 3.9 967 14M 500,000+ Free 0 \n", + "2 4.7 87510 8.7M 5,000,000+ Free 0 \n", + "3 4.5 215644 25M 50,000,000+ Free 0 \n", + "4 4.3 967 2.8M 100,000+ Free 0 \n", + "... ... ... ... ... ... ... \n", + "10836 4.5 38 53M 5,000+ Free 0 \n", + "10837 5.0 4 3.6M 100+ Free 0 \n", + "10838 NaN 3 9.5M 1,000+ Free 0 \n", + "10839 4.5 114 Varies with device 1,000+ Free 0 \n", + "10840 4.5 398307 19M 10,000,000+ Free 0 \n", + "\n", + " Content Rating Genres Last Updated \\\n", + "0 Everyone Art & Design January 7, 2018 \n", + "1 Everyone Art & Design;Pretend Play January 15, 2018 \n", + "2 Everyone Art & Design August 1, 2018 \n", + "3 Teen Art & Design June 8, 2018 \n", + "4 Everyone Art & Design;Creativity June 20, 2018 \n", + "... ... ... ... \n", + "10836 Everyone Education July 25, 2017 \n", + "10837 Everyone Education July 6, 2018 \n", + "10838 Everyone Medical January 20, 2017 \n", + "10839 Mature 17+ Books & Reference January 19, 2015 \n", + "10840 Everyone Lifestyle July 25, 2018 \n", + "\n", + " Current Ver Android Ver \n", + "0 1.0.0 4.0.3 and up \n", + "1 2.0.0 4.0.3 and up \n", + "2 1.2.4 4.0.3 and up \n", + "3 Varies with device 4.2 and up \n", + "4 1.1 4.4 and up \n", + "... ... ... \n", + "10836 1.48 4.1 and up \n", + "10837 1.0 4.1 and up \n", + "10838 1.0 2.2 and up \n", + "10839 Varies with device Varies with device \n", + "10840 Varies with device Varies with device \n", + "\n", + "[10841 rows x 13 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play = pd.read_csv(\".\\data\\googleplaystore.csv\")\n", + "google_play" ] }, { @@ -76,11 +357,38 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10841 entries, 0 to 10840\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 App 10841 non-null object \n", + " 1 Category 10841 non-null object \n", + " 2 Rating 9367 non-null float64\n", + " 3 Reviews 10841 non-null object \n", + " 4 Size 10841 non-null object \n", + " 5 Installs 10841 non-null object \n", + " 6 Type 10840 non-null object \n", + " 7 Price 10841 non-null object \n", + " 8 Content Rating 10840 non-null object \n", + " 9 Genres 10841 non-null object \n", + " 10 Last Updated 10841 non-null object \n", + " 11 Current Ver 10833 non-null object \n", + " 12 Android Ver 10838 non-null object \n", + "dtypes: float64(1), object(12)\n", + "memory usage: 1.1+ MB\n" + ] + } + ], + "source": [ + "google_play.info()" ] }, { @@ -92,11 +400,167 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid Ver
0Photo Editor & Candy Camera & Grid & ScrapBookART_AND_DESIGN4.115919M10,000+Free0EveryoneArt & DesignJanuary 7, 20181.0.04.0.3 and up
1Coloring book moanaART_AND_DESIGN3.996714M500,000+Free0EveryoneArt & Design;Pretend PlayJanuary 15, 20182.0.04.0.3 and up
2U Launcher Lite – FREE Live Cool Themes, Hide ...ART_AND_DESIGN4.7875108.7M5,000,000+Free0EveryoneArt & DesignAugust 1, 20181.2.44.0.3 and up
3Sketch - Draw & PaintART_AND_DESIGN4.521564425M50,000,000+Free0TeenArt & DesignJune 8, 2018Varies with device4.2 and up
4Pixel Draw - Number Art Coloring BookART_AND_DESIGN4.39672.8M100,000+Free0EveryoneArt & Design;CreativityJune 20, 20181.14.4 and up
\n", + "
" + ], + "text/plain": [ + " App Category Rating \\\n", + "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN 4.1 \n", + "1 Coloring book moana ART_AND_DESIGN 3.9 \n", + "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN 4.7 \n", + "3 Sketch - Draw & Paint ART_AND_DESIGN 4.5 \n", + "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN 4.3 \n", + "\n", + " Reviews Size Installs Type Price Content Rating \\\n", + "0 159 19M 10,000+ Free 0 Everyone \n", + "1 967 14M 500,000+ Free 0 Everyone \n", + "2 87510 8.7M 5,000,000+ Free 0 Everyone \n", + "3 215644 25M 50,000,000+ Free 0 Teen \n", + "4 967 2.8M 100,000+ Free 0 Everyone \n", + "\n", + " Genres Last Updated Current Ver \\\n", + "0 Art & Design January 7, 2018 1.0.0 \n", + "1 Art & Design;Pretend Play January 15, 2018 2.0.0 \n", + "2 Art & Design August 1, 2018 1.2.4 \n", + "3 Art & Design June 8, 2018 Varies with device \n", + "4 Art & Design;Creativity June 20, 2018 1.1 \n", + "\n", + " Android Ver \n", + "0 4.0.3 and up \n", + "1 4.0.3 and up \n", + "2 4.0.3 and up \n", + "3 4.2 and up \n", + "4 4.4 and up " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play.head()" ] }, { @@ -112,11 +576,303 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numeric
0Photo Editor & Candy Camera & Grid & ScrapBookART_AND_DESIGN4.115919M10,000+Free0EveryoneArt & DesignJanuary 7, 20181.0.04.0.3 and up159.0
1Coloring book moanaART_AND_DESIGN3.996714M500,000+Free0EveryoneArt & Design;Pretend PlayJanuary 15, 20182.0.04.0.3 and up967.0
2U Launcher Lite – FREE Live Cool Themes, Hide ...ART_AND_DESIGN4.7875108.7M5,000,000+Free0EveryoneArt & DesignAugust 1, 20181.2.44.0.3 and up87510.0
3Sketch - Draw & PaintART_AND_DESIGN4.521564425M50,000,000+Free0TeenArt & DesignJune 8, 2018Varies with device4.2 and up215644.0
4Pixel Draw - Number Art Coloring BookART_AND_DESIGN4.39672.8M100,000+Free0EveryoneArt & Design;CreativityJune 20, 20181.14.4 and up967.0
.............................................
10836Sya9a Maroc - FRFAMILY4.53853M5,000+Free0EveryoneEducationJuly 25, 20171.484.1 and up38.0
10837Fr. Mike Schmitz Audio TeachingsFAMILY5.043.6M100+Free0EveryoneEducationJuly 6, 20181.04.1 and up4.0
10838Parkinson Exercices FRMEDICALNaN39.5M1,000+Free0EveryoneMedicalJanuary 20, 20171.02.2 and up3.0
10839The SCP Foundation DB fr nn5nBOOKS_AND_REFERENCE4.5114Varies with device1,000+Free0Mature 17+Books & ReferenceJanuary 19, 2015Varies with deviceVaries with device114.0
10840iHoroscope - 2018 Daily Horoscope & AstrologyLIFESTYLE4.539830719M10,000,000+Free0EveryoneLifestyleJuly 25, 2018Varies with deviceVaries with device398307.0
\n", + "

10841 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " App Category \\\n", + "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN \n", + "1 Coloring book moana ART_AND_DESIGN \n", + "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN \n", + "3 Sketch - Draw & Paint ART_AND_DESIGN \n", + "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN \n", + "... ... ... \n", + "10836 Sya9a Maroc - FR FAMILY \n", + "10837 Fr. Mike Schmitz Audio Teachings FAMILY \n", + "10838 Parkinson Exercices FR MEDICAL \n", + "10839 The SCP Foundation DB fr nn5n BOOKS_AND_REFERENCE \n", + "10840 iHoroscope - 2018 Daily Horoscope & Astrology LIFESTYLE \n", + "\n", + " Rating Reviews Size Installs Type Price \\\n", + "0 4.1 159 19M 10,000+ Free 0 \n", + "1 3.9 967 14M 500,000+ Free 0 \n", + "2 4.7 87510 8.7M 5,000,000+ Free 0 \n", + "3 4.5 215644 25M 50,000,000+ Free 0 \n", + "4 4.3 967 2.8M 100,000+ Free 0 \n", + "... ... ... ... ... ... ... \n", + "10836 4.5 38 53M 5,000+ Free 0 \n", + "10837 5.0 4 3.6M 100+ Free 0 \n", + "10838 NaN 3 9.5M 1,000+ Free 0 \n", + "10839 4.5 114 Varies with device 1,000+ Free 0 \n", + "10840 4.5 398307 19M 10,000,000+ Free 0 \n", + "\n", + " Content Rating Genres Last Updated \\\n", + "0 Everyone Art & Design January 7, 2018 \n", + "1 Everyone Art & Design;Pretend Play January 15, 2018 \n", + "2 Everyone Art & Design August 1, 2018 \n", + "3 Teen Art & Design June 8, 2018 \n", + "4 Everyone Art & Design;Creativity June 20, 2018 \n", + "... ... ... ... \n", + "10836 Everyone Education July 25, 2017 \n", + "10837 Everyone Education July 6, 2018 \n", + "10838 Everyone Medical January 20, 2017 \n", + "10839 Mature 17+ Books & Reference January 19, 2015 \n", + "10840 Everyone Lifestyle July 25, 2018 \n", + "\n", + " Current Ver Android Ver Reviews_numeric \n", + "0 1.0.0 4.0.3 and up 159.0 \n", + "1 2.0.0 4.0.3 and up 967.0 \n", + "2 1.2.4 4.0.3 and up 87510.0 \n", + "3 Varies with device 4.2 and up 215644.0 \n", + "4 1.1 4.4 and up 967.0 \n", + "... ... ... ... \n", + "10836 1.48 4.1 and up 38.0 \n", + "10837 1.0 4.1 and up 4.0 \n", + "10838 1.0 2.2 and up 3.0 \n", + "10839 Varies with device Varies with device 114.0 \n", + "10840 Varies with device Varies with device 398307.0 \n", + "\n", + "[10841 rows x 14 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play[\"Reviews_numeric\"] = pd.to_numeric(google_play[\"Reviews\"], errors=\"coerce\") \n", + "google_play" ] }, { @@ -128,11 +884,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_play[\"Reviews_isnull\"] = google_play[\"Reviews_numeric\"].isnull()" ] }, { @@ -148,11 +904,88 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numericReviews_isnull
10472Life Made WI-Fi Touchscreen Photo Frame1.919.03.0M1,000+Free0EveryoneNaNFebruary 11, 20181.0.194.0 and upNaNNaNTrue
\n", + "
" + ], + "text/plain": [ + " App Category Rating Reviews \\\n", + "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3.0M \n", + "\n", + " Size Installs Type Price Content Rating Genres \\\n", + "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n", + "\n", + " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n", + "10472 1.0.19 4.0 and up NaN NaN True " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play[google_play[\"Reviews_isnull\"]==True]" ] }, { @@ -172,12 +1005,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n", - "\n", "def convert_string_to_numeric(s):\n", " \"\"\"\n", " Convert a string value to numeric. If the last character of the string is `M`, obtain the \n", @@ -190,11 +1032,18 @@ " Returns:\n", " The correct numeric value of the Reviews score.\n", " \"\"\"\n", - " return np.NaN\n", + " try:\n", + " s = float(s)\n", + " except ValueError:\n", + " s = s.split(\"M\")[0]\n", + " s = s.split(\".\")\n", + " s = f\"{s[0]}{s[1]}00000\"\n", + " s = float(s)\n", + " return s\n", "\n", "test_string = '4.0M'\n", "\n", - "convert_string_to_numeric(test_string) == 4000000" + "convert_string_to_numeric(test_string) == 4000000.0" ] }, { @@ -206,11 +1055,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_play[\"Reviews\"] = google_play[\"Reviews\"].apply(convert_string_to_numeric)" ] }, { @@ -224,11 +1073,88 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numericReviews_isnull
10472Life Made WI-Fi Touchscreen Photo Frame1.919.03000000.01,000+Free0EveryoneNaNFebruary 11, 20181.0.194.0 and upNaNNaNTrue
\n", + "
" + ], + "text/plain": [ + " App Category Rating Reviews \\\n", + "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3000000.0 \n", + "\n", + " Size Installs Type Price Content Rating Genres \\\n", + "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n", + "\n", + " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n", + "10472 1.0.19 4.0 and up NaN NaN True " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play[google_play[\"Reviews_isnull\"]==True]" ] }, { @@ -240,11 +1166,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews float64\n", + "Size object\n", + "Installs object\n", + "Type object\n", + "Price object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated object\n", + "Current Ver object\n", + "Android Ver object\n", + "Reviews_numeric float64\n", + "Reviews_isnull bool\n", + "dtype: object" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play.dtypes" ] }, { @@ -258,11 +1210,109 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['19M', '14M', '8.7M', '25M', '2.8M', '5.6M', '29M', '33M', '3.1M',\n", + " '28M', '12M', '20M', '21M', '37M', '2.7M', '5.5M', '17M', '39M',\n", + " '31M', '4.2M', '7.0M', '23M', '6.0M', '6.1M', '4.6M', '9.2M',\n", + " '5.2M', '11M', '24M', 'Varies with device', '9.4M', '15M', '10M',\n", + " '1.2M', '26M', '8.0M', '7.9M', '56M', '57M', '35M', '54M', '201k',\n", + " '3.6M', '5.7M', '8.6M', '2.4M', '27M', '2.5M', '16M', '3.4M',\n", + " '8.9M', '3.9M', '2.9M', '38M', '32M', '5.4M', '18M', '1.1M',\n", + " '2.2M', '4.5M', '9.8M', '52M', '9.0M', '6.7M', '30M', '2.6M',\n", + " '7.1M', '3.7M', '22M', '7.4M', '6.4M', '3.2M', '8.2M', '9.9M',\n", + " '4.9M', '9.5M', '5.0M', '5.9M', '13M', '73M', '6.8M', '3.5M',\n", + " '4.0M', '2.3M', '7.2M', '2.1M', '42M', '7.3M', '9.1M', '55M',\n", + " '23k', '6.5M', '1.5M', '7.5M', '51M', '41M', '48M', '8.5M', '46M',\n", + " '8.3M', '4.3M', '4.7M', '3.3M', '40M', '7.8M', '8.8M', '6.6M',\n", + " '5.1M', '61M', '66M', '79k', '8.4M', '118k', '44M', '695k', '1.6M',\n", + " '6.2M', '18k', '53M', '1.4M', '3.0M', '5.8M', '3.8M', '9.6M',\n", + " '45M', '63M', '49M', '77M', '4.4M', '4.8M', '70M', '6.9M', '9.3M',\n", + " '10.0M', '8.1M', '36M', '84M', '97M', '2.0M', '1.9M', '1.8M',\n", + " '5.3M', '47M', '556k', '526k', '76M', '7.6M', '59M', '9.7M', '78M',\n", + " '72M', '43M', '7.7M', '6.3M', '334k', '34M', '93M', '65M', '79M',\n", + " '100M', '58M', '50M', '68M', '64M', '67M', '60M', '94M', '232k',\n", + " '99M', '624k', '95M', '8.5k', '41k', '292k', '11k', '80M', '1.7M',\n", + " '74M', '62M', '69M', '75M', '98M', '85M', '82M', '96M', '87M',\n", + " '71M', '86M', '91M', '81M', '92M', '83M', '88M', '704k', '862k',\n", + " '899k', '378k', '266k', '375k', '1.3M', '975k', '980k', '4.1M',\n", + " '89M', '696k', '544k', '525k', '920k', '779k', '853k', '720k',\n", + " '713k', '772k', '318k', '58k', '241k', '196k', '857k', '51k',\n", + " '953k', '865k', '251k', '930k', '540k', '313k', '746k', '203k',\n", + " '26k', '314k', '239k', '371k', '220k', '730k', '756k', '91k',\n", + " '293k', '17k', '74k', '14k', '317k', '78k', '924k', '902k', '818k',\n", + " '81k', '939k', '169k', '45k', '475k', '965k', '90M', '545k', '61k',\n", + " '283k', '655k', '714k', '93k', '872k', '121k', '322k', '1.0M',\n", + " '976k', '172k', '238k', '549k', '206k', '954k', '444k', '717k',\n", + " '210k', '609k', '308k', '705k', '306k', '904k', '473k', '175k',\n", + " '350k', '383k', '454k', '421k', '70k', '812k', '442k', '842k',\n", + " '417k', '412k', '459k', '478k', '335k', '782k', '721k', '430k',\n", + " '429k', '192k', '200k', '460k', '728k', '496k', '816k', '414k',\n", + " '506k', '887k', '613k', '243k', '569k', '778k', '683k', '592k',\n", + " '319k', '186k', '840k', '647k', '191k', '373k', '437k', '598k',\n", + " '716k', '585k', '982k', '222k', '219k', '55k', '948k', '323k',\n", + " '691k', '511k', '951k', '963k', '25k', '554k', '351k', '27k',\n", + " '82k', '208k', '913k', '514k', '551k', '29k', '103k', '898k',\n", + " '743k', '116k', '153k', '209k', '353k', '499k', '173k', '597k',\n", + " '809k', '122k', '411k', '400k', '801k', '787k', '237k', '50k',\n", + " '643k', '986k', '97k', '516k', '837k', '780k', '961k', '269k',\n", + " '20k', '498k', '600k', '749k', '642k', '881k', '72k', '656k',\n", + " '601k', '221k', '228k', '108k', '940k', '176k', '33k', '663k',\n", + " '34k', '942k', '259k', '164k', '458k', '245k', '629k', '28k',\n", + " '288k', '775k', '785k', '636k', '916k', '994k', '309k', '485k',\n", + " '914k', '903k', '608k', '500k', '54k', '562k', '847k', '957k',\n", + " '688k', '811k', '270k', '48k', '329k', '523k', '921k', '874k',\n", + " '981k', '784k', '280k', '24k', '518k', '754k', '892k', '154k',\n", + " '860k', '364k', '387k', '626k', '161k', '879k', '39k', '970k',\n", + " '170k', '141k', '160k', '144k', '143k', '190k', '376k', '193k',\n", + " '246k', '73k', '658k', '992k', '253k', '420k', '404k', '1,000+',\n", + " '470k', '226k', '240k', '89k', '234k', '257k', '861k', '467k',\n", + " '157k', '44k', '676k', '67k', '552k', '885k', '1020k', '582k',\n", + " '619k'], dtype=object)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play[\"Size\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Varies with device 1695\n", + "11M 198\n", + "12M 196\n", + "14M 194\n", + "13M 191\n", + " ... \n", + "429k 1\n", + "200k 1\n", + "460k 1\n", + "728k 1\n", + "619k 1\n", + "Name: Size, Length: 462, dtype: int64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play[\"Size\"].value_counts()" ] }, { @@ -278,11 +1328,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.15635089013928605" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "\"\"\"\n", + "Varies with Device\n", + "\"\"\"\n", + "1695/len(google_play)" ] }, { @@ -298,11 +1362,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_play.drop(\"Size\", axis=1, inplace=True)" ] }, { @@ -318,11 +1382,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "App 0\n", + "Category 0\n", + "Rating 1474\n", + "Reviews 0\n", + "Installs 0\n", + "Type 1\n", + "Price 0\n", + "Content Rating 1\n", + "Genres 0\n", + "Last Updated 0\n", + "Current Ver 8\n", + "Android Ver 3\n", + "Reviews_numeric 1\n", + "Reviews_isnull 0\n", + "dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_play.isnull().sum()" ] }, { @@ -338,11 +1427,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.13596531685268887" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "1474/len(google_play)" ] }, { @@ -364,11 +1464,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "App 0\n", + "Category 0\n", + "Rating 0\n", + "Reviews 0\n", + "Installs 0\n", + "Type 0\n", + "Price 0\n", + "Content Rating 0\n", + "Genres 0\n", + "Last Updated 0\n", + "Current Ver 0\n", + "Android Ver 0\n", + "Reviews_numeric 0\n", + "Reviews_isnull 0\n", + "dtype: int64" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed = google_play.dropna()\n", + "google_missing_removed.isnull().sum()" ] }, { @@ -384,11 +1510,327 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\2269372765.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " google_missing_removed[\"Last Updated\"] = pd.to_datetime(google_missing_removed[\"Last Updated\"])\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numericReviews_isnullPrice Numerical
0Photo Editor & Candy Camera & Grid & ScrapBookART_AND_DESIGN4.1159.010,000+Free0EveryoneArt & Design2018-01-071.0.04.0.3 and up159.0False0
1Coloring book moanaART_AND_DESIGN3.9967.0500,000+Free0EveryoneArt & Design;Pretend Play2018-01-152.0.04.0.3 and up967.0False0
2U Launcher Lite – FREE Live Cool Themes, Hide ...ART_AND_DESIGN4.787510.05,000,000+Free0EveryoneArt & Design2018-08-011.2.44.0.3 and up87510.0False0
3Sketch - Draw & PaintART_AND_DESIGN4.5215644.050,000,000+Free0TeenArt & Design2018-06-08Varies with device4.2 and up215644.0False0
4Pixel Draw - Number Art Coloring BookART_AND_DESIGN4.3967.0100,000+Free0EveryoneArt & Design;Creativity2018-06-201.14.4 and up967.0False0
................................................
10834FR CalculatorFAMILY4.07.0500+Free0EveryoneEducation2017-06-181.0.04.1 and up7.0False0
10836Sya9a Maroc - FRFAMILY4.538.05,000+Free0EveryoneEducation2017-07-251.484.1 and up38.0False0
10837Fr. Mike Schmitz Audio TeachingsFAMILY5.04.0100+Free0EveryoneEducation2018-07-061.04.1 and up4.0False0
10839The SCP Foundation DB fr nn5nBOOKS_AND_REFERENCE4.5114.01,000+Free0Mature 17+Books & Reference2015-01-19Varies with deviceVaries with device114.0False0
10840iHoroscope - 2018 Daily Horoscope & AstrologyLIFESTYLE4.5398307.010,000,000+Free0EveryoneLifestyle2018-07-25Varies with deviceVaries with device398307.0False0
\n", + "

9360 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " App Category \\\n", + "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN \n", + "1 Coloring book moana ART_AND_DESIGN \n", + "2 U Launcher Lite – FREE Live Cool Themes, Hide ... ART_AND_DESIGN \n", + "3 Sketch - Draw & Paint ART_AND_DESIGN \n", + "4 Pixel Draw - Number Art Coloring Book ART_AND_DESIGN \n", + "... ... ... \n", + "10834 FR Calculator FAMILY \n", + "10836 Sya9a Maroc - FR FAMILY \n", + "10837 Fr. Mike Schmitz Audio Teachings FAMILY \n", + "10839 The SCP Foundation DB fr nn5n BOOKS_AND_REFERENCE \n", + "10840 iHoroscope - 2018 Daily Horoscope & Astrology LIFESTYLE \n", + "\n", + " Rating Reviews Installs Type Price Content Rating \\\n", + "0 4.1 159.0 10,000+ Free 0 Everyone \n", + "1 3.9 967.0 500,000+ Free 0 Everyone \n", + "2 4.7 87510.0 5,000,000+ Free 0 Everyone \n", + "3 4.5 215644.0 50,000,000+ Free 0 Teen \n", + "4 4.3 967.0 100,000+ Free 0 Everyone \n", + "... ... ... ... ... ... ... \n", + "10834 4.0 7.0 500+ Free 0 Everyone \n", + "10836 4.5 38.0 5,000+ Free 0 Everyone \n", + "10837 5.0 4.0 100+ Free 0 Everyone \n", + "10839 4.5 114.0 1,000+ Free 0 Mature 17+ \n", + "10840 4.5 398307.0 10,000,000+ Free 0 Everyone \n", + "\n", + " Genres Last Updated Current Ver \\\n", + "0 Art & Design 2018-01-07 1.0.0 \n", + "1 Art & Design;Pretend Play 2018-01-15 2.0.0 \n", + "2 Art & Design 2018-08-01 1.2.4 \n", + "3 Art & Design 2018-06-08 Varies with device \n", + "4 Art & Design;Creativity 2018-06-20 1.1 \n", + "... ... ... ... \n", + "10834 Education 2017-06-18 1.0.0 \n", + "10836 Education 2017-07-25 1.48 \n", + "10837 Education 2018-07-06 1.0 \n", + "10839 Books & Reference 2015-01-19 Varies with device \n", + "10840 Lifestyle 2018-07-25 Varies with device \n", + "\n", + " Android Ver Reviews_numeric Reviews_isnull Price Numerical \n", + "0 4.0.3 and up 159.0 False 0 \n", + "1 4.0.3 and up 967.0 False 0 \n", + "2 4.0.3 and up 87510.0 False 0 \n", + "3 4.2 and up 215644.0 False 0 \n", + "4 4.4 and up 967.0 False 0 \n", + "... ... ... ... ... \n", + "10834 4.1 and up 7.0 False 0 \n", + "10836 4.1 and up 38.0 False 0 \n", + "10837 4.1 and up 4.0 False 0 \n", + "10839 Varies with device 114.0 False 0 \n", + "10840 Varies with device 398307.0 False 0 \n", + "\n", + "[9360 rows x 15 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed[\"Last Updated\"] = pd.to_datetime(google_missing_removed[\"Last Updated\"])\n", + "google_missing_removed" ] }, { @@ -402,11 +1844,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 56, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '$4.99', '$3.99', '$6.99', '$7.99', '$5.99', '$2.99', '$3.49',\n", + " '$1.99', '$9.99', '$7.49', '$0.99', '$9.00', '$5.49', '$10.00',\n", + " '$24.99', '$11.99', '$79.99', '$16.99', '$14.99', '$29.99',\n", + " '$12.99', '$2.49', '$10.99', '$1.50', '$19.99', '$15.99', '$33.99',\n", + " '$39.99', '$3.95', '$4.49', '$1.70', '$8.99', '$1.49', '$3.88',\n", + " '$399.99', '$17.99', '$400.00', '$3.02', '$1.76', '$4.84', '$4.77',\n", + " '$1.61', '$2.50', '$1.59', '$6.49', '$1.29', '$299.99', '$379.99',\n", + " '$37.99', '$18.99', '$389.99', '$8.49', '$1.75', '$14.00', '$2.00',\n", + " '$3.08', '$2.59', '$19.40', '$3.90', '$4.59', '$15.46', '$3.04',\n", + " '$13.99', '$4.29', '$3.28', '$4.60', '$1.00', '$2.95', '$2.90',\n", + " '$1.97', '$2.56', '$1.20'], dtype=object)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed[\"Price\"].unique()" ] }, { @@ -422,11 +1887,45 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\3901917276.py:1: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", + " google_missing_removed[\"Price Numerical\"] = google_missing_removed[\"Price\"].str.replace(\"$\",\"\")\n", + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\3901917276.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " google_missing_removed[\"Price Numerical\"] = google_missing_removed[\"Price\"].str.replace(\"$\",\"\")\n" + ] + }, + { + "data": { + "text/plain": [ + "array(['0', '4.99', '3.99', '6.99', '7.99', '5.99', '2.99', '3.49',\n", + " '1.99', '9.99', '7.49', '0.99', '9.00', '5.49', '10.00', '24.99',\n", + " '11.99', '79.99', '16.99', '14.99', '29.99', '12.99', '2.49',\n", + " '10.99', '1.50', '19.99', '15.99', '33.99', '39.99', '3.95',\n", + " '4.49', '1.70', '8.99', '1.49', '3.88', '399.99', '17.99',\n", + " '400.00', '3.02', '1.76', '4.84', '4.77', '1.61', '2.50', '1.59',\n", + " '6.49', '1.29', '299.99', '379.99', '37.99', '18.99', '389.99',\n", + " '8.49', '1.75', '14.00', '2.00', '3.08', '2.59', '19.40', '3.90',\n", + " '4.59', '15.46', '3.04', '13.99', '4.29', '3.28', '4.60', '1.00',\n", + " '2.95', '2.90', '1.97', '2.56', '1.20'], dtype=object)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed[\"Price Numerical\"] = google_missing_removed[\"Price\"].str.replace(\"$\",\"\")" ] }, { @@ -438,11 +1937,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\1391389292.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " google_missing_removed[\"Price Numerical\"] = pd.to_numeric(google_missing_removed[\"Price Numerical\"])\n" + ] + } + ], "source": [ - "# Your code here:\n" + "google_missing_removed[\"Price Numerical\"] = pd.to_numeric(google_missing_removed[\"Price Numerical\"])" ] }, { @@ -454,11 +1966,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\2079523825.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " google_missing_removed.drop(\"Price\", axis=1, inplace=True)\n" + ] + } + ], "source": [ - "# Your code here:\n" + "google_missing_removed.drop(\"Price\", axis=1, inplace=True)" ] }, { @@ -474,11 +1998,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here" + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews float64\n", + "Installs object\n", + "Type object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated datetime64[ns]\n", + "Current Ver object\n", + "Android Ver object\n", + "Reviews_numeric float64\n", + "Reviews_isnull bool\n", + "Price Numerical float64\n", + "dtype: object" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed.dtypes" ] }, { @@ -497,11 +2046,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_reviews = pd.read_csv(\"./data/googleplaystore_user_reviews.csv\")" ] }, { @@ -515,11 +2064,105 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppTranslated_ReviewSentimentSentiment_PolaritySentiment_Subjectivity
010 Best Foods for YouI like eat delicious food. That's I'm cooking ...Positive1.000.533333
110 Best Foods for YouThis help eating healthy exercise regular basisPositive0.250.288462
210 Best Foods for YouNaNNaNNaNNaN
310 Best Foods for YouWorks great especially going grocery storePositive0.400.875000
410 Best Foods for YouBest idea usPositive1.000.300000
\n", + "
" + ], + "text/plain": [ + " App Translated_Review \\\n", + "0 10 Best Foods for You I like eat delicious food. That's I'm cooking ... \n", + "1 10 Best Foods for You This help eating healthy exercise regular basis \n", + "2 10 Best Foods for You NaN \n", + "3 10 Best Foods for You Works great especially going grocery store \n", + "4 10 Best Foods for You Best idea us \n", + "\n", + " Sentiment Sentiment_Polarity Sentiment_Subjectivity \n", + "0 Positive 1.00 0.533333 \n", + "1 Positive 0.25 0.288462 \n", + "2 NaN NaN NaN \n", + "3 Positive 0.40 0.875000 \n", + "4 Positive 1.00 0.300000 " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_reviews.head()" ] }, { @@ -546,11 +2189,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "review_missing_removed = google_reviews.dropna()" ] }, { @@ -562,11 +2205,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Bowmasters 312\n", + "Helix Jump 273\n", + "Angry Birds Classic 273\n", + "Calorie Counter - MyFitnessPal 254\n", + "Duolingo: Learn Languages Free 240\n", + " ... \n", + "Draw a Stickman: EPIC 2 1\n", + "HD Camera 1\n", + "Draw In 1\n", + "Draw A Stickman 1\n", + "Best Fiends - Free Puzzle Game 1\n", + "Name: App, Length: 865, dtype: int64" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "review_missing_removed[\"App\"].value_counts()" ] }, { @@ -595,12 +2260,10 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "metadata": {}, "outputs": [], "source": [ - "# Your code below\n", - "\n", "def positive_function(x):\n", " \"\"\"\n", " Count how many times the string `Positive` appears in a column (exact string match).\n", @@ -611,7 +2274,7 @@ " Returns:\n", " The number of occurrences of `Positive` in the column data.\n", " \"\"\"\n", - " return 0" + " return len(np.where(x == 'Positive')[0])" ] }, { @@ -635,11 +2298,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_22524\\184782220.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " review_missing_removed[\"positive\"] = review_missing_removed[\"Sentiment\"].apply(positive_function)\n" + ] + } + ], + "source": [ + "review_missing_removed[\"positive\"] = review_missing_removed[\"Sentiment\"].apply(positive_function)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_agg = review_missing_removed.groupby(\"App\").agg({\"positive\":\"sum\",\"Sentiment\":\"count\"})\n", + "google_agg.columns = [\"Positive\", \"Total\"]" ] }, { @@ -651,11 +2337,86 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here\n" + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PositiveTotal
App
10 Best Foods for You162194
104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室3140
11st2339
1800 Contacts - Lens Store6480
1LINE – One Line with One Touch2738
\n", + "
" + ], + "text/plain": [ + " Positive Total\n", + "App \n", + "10 Best Foods for You 162 194\n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40\n", + "11st 23 39\n", + "1800 Contacts - Lens Store 64 80\n", + "1LINE – One Line with One Touch 27 38" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_agg.head()" ] }, { @@ -669,11 +2430,153 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PositiveTotalPositve Ratio
App
10 Best Foods for You1621940.835052
104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室31400.775
11st23390.589744
1800 Contacts - Lens Store64800.8
1LINE – One Line with One Touch27380.710526
............
Hotels.com: Book Hotel Rooms & Find Vacation Deals39680.573529
Hotspot Shield Free VPN Proxy & Wi-Fi Security17340.5
Hotstar14320.4375
Hotwire Hotel & Car Rental App16330.484848
Housing-Real Estate & Property8210.380952
\n", + "

865 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Positive Total \\\n", + "App \n", + "10 Best Foods for You 162 194 \n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40 \n", + "11st 23 39 \n", + "1800 Contacts - Lens Store 64 80 \n", + "1LINE – One Line with One Touch 27 38 \n", + "... ... ... \n", + "Hotels.com: Book Hotel Rooms & Find Vacation Deals 39 68 \n", + "Hotspot Shield Free VPN Proxy & Wi-Fi Security 17 34 \n", + "Hotstar 14 32 \n", + "Hotwire Hotel & Car Rental App 16 33 \n", + "Housing-Real Estate & Property 8 21 \n", + "\n", + " Positve Ratio \n", + "App \n", + "10 Best Foods for You 0.835052 \n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775 \n", + "11st 0.589744 \n", + "1800 Contacts - Lens Store 0.8 \n", + "1LINE – One Line with One Touch 0.710526 \n", + "... ... \n", + "Hotels.com: Book Hotel Rooms & Find Vacation Deals 0.573529 \n", + "Hotspot Shield Free VPN Proxy & Wi-Fi Security 0.5 \n", + "Hotstar 0.4375 \n", + "Hotwire Hotel & Car Rental App 0.484848 \n", + "Housing-Real Estate & Property 0.380952 \n", + "\n", + "[865 rows x 3 columns]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_agg[\"Positve Ratio\"] = np.where(google_agg[\"Total\"]>0, google_agg[\"Positive\"]/google_agg[\"Total\"], None)\n", + "google_agg" ] }, { @@ -685,11 +2588,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google_agg.drop([\"Positive\", \"Total\"], axis=1, inplace=True)" ] }, { @@ -703,11 +2606,79 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Positve Ratio
App
10 Best Foods for You0.835052
104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室0.775
11st0.589744
1800 Contacts - Lens Store0.8
1LINE – One Line with One Touch0.710526
\n", + "
" + ], + "text/plain": [ + " Positve Ratio\n", + "App \n", + "10 Best Foods for You 0.835052\n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775\n", + "11st 0.589744\n", + "1800 Contacts - Lens Store 0.8\n", + "1LINE – One Line with One Touch 0.710526" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_agg.head()" ] }, { @@ -723,11 +2694,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 98, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "google = pd.merge(left=google_missing_removed, right=google_agg, on=\"App\")" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "google.drop([\"Reviews_numeric\", \"Reviews_isnull\"], axis=1, inplace=True)" ] }, { @@ -739,19 +2719,175 @@ "![Final Product](../images/google-final-head.png)" ] }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsInstallsTypeContent RatingGenresLast UpdatedCurrent VerAndroid VerPrice NumericalPositve Ratio
0Coloring book moanaART_AND_DESIGN3.9967.0500,000+FreeEveryoneArt & Design;Pretend Play2018-01-152.0.04.0.3 and up0.00.590909
1Coloring book moanaFAMILY3.9974.0500,000+FreeEveryoneArt & Design;Pretend Play2018-01-152.0.04.0.3 and up0.00.590909
2Garden Coloring BookART_AND_DESIGN4.413791.01,000,000+FreeEveryoneArt & Design2017-09-202.9.23.0 and up0.00.711111
3FlipaClip - Cartoon animationART_AND_DESIGN4.3194216.05,000,000+FreeEveryoneArt & Design2018-08-032.2.54.0.3 and up0.01.0
4Boys Photo Editor - Six Pack & Men's SuitART_AND_DESIGN4.1654.0100,000+FreeEveryoneArt & Design2018-03-201.14.0.3 and up0.00.605263
\n", + "
" + ], + "text/plain": [ + " App Category Rating \\\n", + "0 Coloring book moana ART_AND_DESIGN 3.9 \n", + "1 Coloring book moana FAMILY 3.9 \n", + "2 Garden Coloring Book ART_AND_DESIGN 4.4 \n", + "3 FlipaClip - Cartoon animation ART_AND_DESIGN 4.3 \n", + "4 Boys Photo Editor - Six Pack & Men's Suit ART_AND_DESIGN 4.1 \n", + "\n", + " Reviews Installs Type Content Rating Genres \\\n", + "0 967.0 500,000+ Free Everyone Art & Design;Pretend Play \n", + "1 974.0 500,000+ Free Everyone Art & Design;Pretend Play \n", + "2 13791.0 1,000,000+ Free Everyone Art & Design \n", + "3 194216.0 5,000,000+ Free Everyone Art & Design \n", + "4 654.0 100,000+ Free Everyone Art & Design \n", + "\n", + " Last Updated Current Ver Android Ver Price Numerical Positve Ratio \n", + "0 2018-01-15 2.0.0 4.0.3 and up 0.0 0.590909 \n", + "1 2018-01-15 2.0.0 4.0.3 and up 0.0 0.590909 \n", + "2 2017-09-20 2.9.2 3.0 and up 0.0 0.711111 \n", + "3 2018-08-03 2.2.5 4.0.3 and up 0.0 1.0 \n", + "4 2018-03-20 1.1 4.0.3 and up 0.0 0.605263 " + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google.head()" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Your code here:\n" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -765,7 +2901,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.11.3" } }, "nbformat": 4,