-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata-modeling-lab2.sql
More file actions
130 lines (121 loc) · 4.31 KB
/
data-modeling-lab2.sql
File metadata and controls
130 lines (121 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
-- Crear una tabla slowling changing dimension --
CREATE TABLE players_scd (
player_name TEXT,
scoring_class scoring_class,
is_active BOOLEAN,
start_season INTEGER,
end_season INTEGER,
current_season INTEGER,
PRIMARY KEY (player_name, start_season)
);
-- Crear tipo para array usado en changed_records --
CREATE TYPE scd_type AS(
scoring_class scoring_class,
is_active BOOLEAN,
start_season INTEGER,
end_season INTEGER
)
-- Comparacion de si hubo cambio entre score actual y si esta activo e ingresar los datos en tabla --
INSERT INTO players_scd
WITH with_previous AS(
-- Selecciona datos de la tabla players y calcula valores previos
SELECT player_name,
current_season,
scoring_class,
is_active,
LAG(scoring_class,1) OVER (PARTITION BY player_name ORDER BY current_season) AS previous_scoring_class, -- Obtiene el valor de scoring_class del registro anterior para cada jugador, permitiendo detectar cambios en el tiempo
LAG(is_active,1) OVER (PARTITION BY player_name ORDER BY current_season) AS previous_is_active
FROM players
),
with_indicators AS (
-- Identifica si hubo cambios en scoring_class o is_active, asignando un 1 si lo hubo 0
SELECT *,
CASE WHEN scoring_class <> previous_scoring_class THEN 1
WHEN is_active <> previous_is_active THEN 1
ELSE 0
END AS change_indicator -- Columna donde se vera reflejado si hubo un cambio en la columna o no
FROM with_previous
WHERE current_season <= 2021
),
-- Agrupa períodos consecutivos sin cambios
with_streaks AS (
SELECT *, SUM(change_indicator) OVER (PARTITION BY player_name ORDER BY current_season) AS streak_identifier -- Columna que lleva el conteo de cambios en columnas indicadas por cada jugador por temporada
FROM with_indicators
)
-- Inserta los datos agrupados en la tabla players_scd
SELECT player_name, scoring_class, is_active, MIN(current_season) AS start_season, MAX(current_season) AS end_season, 2021 AS current_season
FROM with_streaks
GROUP BY player_name, is_active, scoring_class
ORDER BY player_name;
-- Data incrementada procesando mucho menos data de una manera mas efectiva, con datos de 2021 y 2022 --
WITH last_season_scd AS (
SELECT *
FROM players_scd
WHERE current_season = 2021 AND end_season = 2021
),
historical_scd AS (
SELECT player_name, scoring_class, is_active, start_season, end_season
FROM players_scd
WHERE current_season = 2021 AND end_season < 2021
),
this_season_data AS (
SELECT *
FROM players
WHERE current_season = 2022
),
unchanged_records AS (
SELECT ts.player_name,
ts.scoring_class,
ts.is_active,
ls.start_season,
ls.current_season AS end_season
FROM this_season_data ts
JOIN last_season_scd ls ON ls.player_name = ts.player_name
WHERE ts.scoring_class = ls.scoring_class
AND ts.is_active = ls.is_active
),
changed_records AS (
SELECT ts.player_name,
UNNEST(ARRAY[
ROW(
ls.scoring_class,
ls.is_active,
ls.start_season,
ls.end_season
)::scd_type,
ROW(
ts.scoring_class,
ts.is_active,
ts.current_season,
ts.current_season
)::scd_type
]) AS records
FROM this_season_data ts
LEFT JOIN last_season_scd ls ON ls.player_name = ts.player_name
WHERE (ts.scoring_class <> ls.scoring_class
OR ts.is_active <> ls.is_active)
),
unnested_changed_records AS (
SELECT player_name,
(records::scd_type).scoring_class,
(records::scd_type).is_active,
(records::scd_type).start_season,
(records::scd_type).end_season
FROM changed_records
),
new_records AS (
SELECT ts.player_name,
ts.scoring_class,
ts.is_active,
ts.current_season AS start_season,
ts.current_season AS end_season
FROM this_season_data ts
LEFT JOIN last_season_scd ls ON ts.player_name = ls.player_name
WHERE ls.player_name IS NULL
)
SELECT * FROM historical_scd
UNION ALL
SELECT * FROM unnested_changed_records
UNION ALL
SELECT * FROM new_records;
SELECT * FROM players_scd;