-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfact_data_modeling.sql
More file actions
99 lines (90 loc) · 2.62 KB
/
fact_data_modeling.sql
File metadata and controls
99 lines (90 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
-- Conteo de duplicados con la misma informacion
SELECT
game_id,
team_id,
player_id,
COUNT(1)
FROM game_details
GROUP BY 1,2,3
HAVING COUNT(1) > 1;
CREATE TABLE fct_game_details (
dim_game_date DATE,
dim_season INTEGER,
dim_team_id INTEGER,
dim_player_id INTEGER,
dim_player_name TEXT,
dim_start_position TEXT,
dim_is_playing_at_home BOOLEAN,
dim_did_not_play BOOLEAN,
dim_did_not_dress BOOLEAN,
dim_not_with_team BOOLEAN,
m_minutes REAL,
m_fgm INTEGER,
m_fga INTEGER,
m_fg3m INTEGER,
m_fg3a INTEGER,
m_ftm INTEGER,
m_fta INTEGER,
m_oreb INTEGER,
m_dreb INTEGER,
m_reb INTEGER,
m_ast INTEGER,
m_stl INTEGER,
m_blk INTEGER,
m_turnovers INTEGER,
m_pf INTEGER,
m_pts INTEGER,
m_plus_minus INTEGER,
PRIMARY KEY (dim_game_date, dim_team_id, dim_player_id)
);
INSERT INTO fct_game_details
WITH deduped AS (
SELECT g.game_date_est,
g.season,
g.home_team_id,
g.visitor_team_id,
gd.*, ROW_NUMBER() OVER(PARTITION BY gd.game_id, team_id, player_id ORDER BY g.game_date_est) AS row_num
FROM game_details gd
JOIN games g
ON gd.game_id = g.game_id
)
SELECT game_date_est AS dim_game_date,
season AS dim_season,
team_id AS dim_team_id,
player_id AS dim_player_id,
player_name AS dim_player_name,
start_position AS dim_start_position,
team_id = home_team_id AS dim_is_playing_at_home,
COALESCE(POSITION('DNP' in comment), 0) > 0 AS dim_did_not_play, -- POSITION(value in column) cuenta cuantas veces aparece el valor en el registro de la columna
COALESCE(POSITION('DND' in comment), 0) > 0 AS dim_did_not_dress,
COALESCE(POSITION('NWT' in comment), 0) > 0 AS dim_not_with_team,
(CAST(SPLIT_PART(min, ':', 1) AS REAL) + CAST(SPLIT_PART(min, ':', 2) AS REAL)) / 60 AS m_minutes,
fgm AS m_fgm,
fga AS m_fga,
fg3m AS m_fg3m,
fg3a AS m_fg3a,
ftm AS m_ftm,
fta AS m_fta,
oreb AS m_oreb,
dreb AS m_dreb,
reb AS m_reb,
ast AS m_ast,
stl AS m_stl,
blk AS m_blk,
"TO" AS m_turnovers,
pf AS m_pf,
pts AS m_pts,
plus_minus AS m_plus_minus
FROM deduped
WHERE row_num = 1;
SELECT t.*, gd.*
FROM fct_game_details gd
JOIN teams t
ON t.team_id = gd.dim_team_id;
SELECT dim_player_name,
COUNT(1) AS num_games,
COUNT(CASE WHEN dim_not_with_team THEN 1 END) AS bailed_num,
CAST(COUNT(CASE WHEN dim_not_with_team THEN 1 END) AS REAL)/COUNT(1) AS bail_pct
FROM fct_game_details
GROUP BY 1
ORDER BY 4 DESC;