-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAppStore.sql
More file actions
103 lines (83 loc) · 2.54 KB
/
AppStore.sql
File metadata and controls
103 lines (83 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
USE AppleStore;
SHOW TABLES;
-- Exploratory Data Analysis
-- Check the no. of unique apps in both AppStore & App tables
SELECT COUNT(DISTINCT id) AS UniqueIDs
FROM AppStore;
SELECT COUNT(DISTINCT id) AS UniqueIDs
FROM App;
-- Check for any missing values
SELECT COUNT(*) AS MissingValues
FROM AppStore
WHERE track_name IS NULL OR user_rating IS NULL OR prime_genre IS NULL;
SELECT COUNT(*) AS MissingValues
FROM App
WHERE app_desc IS NULL;
-- Count the no.of apps per genre
SELECT prime_genre, COUNT(*) AS NumApps
FROM AppStore
GROUP BY prime_genre
ORDER BY NumApps DESC;
-- Statistics about the app's user ratings
SELECT max(user_rating) AS MaxRating,
min(user_rating) AS MinRating,
avg(user_rating) AS AvgRating
FROM AppStore;
-- Price distribution of the apps
SELECT PriceBinStart, PriceBinEnd, COUNT(*) AS NumApps
FROM (
SELECT (FLOOR(price / 2) * 2) AS PriceBinStart,
(FLOOR(price / 2) * 2) + 2 AS PriceBinEnd
FROM AppStore
) AS Subquery
GROUP BY PriceBinStart, PriceBinEnd
ORDER BY PriceBinStart;
-- Data Analysis
-- Determine whether Paid Apps have higher ratings than Free Apps
SELECT CASE
WHEN price > 0 THEN 'Paid'
ELSE 'Free'
END AS App_Type,
avg(user_rating) as AvgRating
FROM AppStore
GROUP BY App_Type;
-- Determine whether apps with more supporting languages have higher ratings
SELECT CASE
WHEN lang_num < 10 THEN '< 10 languages'
WHEN lang_num BETWEEN 10 AND 30 THEN '10-30 languages'
ELSE '> 30 languages'
END AS lang_available,
avg(user_rating) AS AvgRating
FROM AppStore
GROUP BY lang_available
ORDER BY AvgRating DESC;
SELECT prime_genre,
avg(user_rating) AS AvgRating
FROM AppStore
GROUP BY prime_genre
ORDER BY AvgRating ASC
LIMIT 10;
-- Determine if lengthy app description has higher user ratings
SELECT description_length,
AVG(AvgRating) AS AvgRating,
COUNT(*) AS DescriptionLengthCount
FROM (
SELECT CASE
WHEN LENGTH(b.app_desc) < 500 THEN 'Short'
WHEN LENGTH(b.app_desc) BETWEEN 500 AND 1000 THEN 'Medium'
ELSE 'Long'
END AS description_length,
a.user_rating AS AvgRating
FROM AppStore AS a
JOIN App AS b ON a.id = b.id
) AS SubqueryAlias
GROUP BY description_length
ORDER BY AvgRating DESC;
-- Check the top-rated apps in each genres
SELECT prime_genre, track_name, user_rating
FROM (
SELECT prime_genre, track_name, user_rating,
RANK() OVER(PARTITION BY prime_genre ORDER BY user_rating DESC, rating_count_tot DESC) AS ranking
FROM AppStore
) AS a
WHERE a.ranking = 1;