-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata-modeling-lab3.sql
More file actions
175 lines (154 loc) · 5.83 KB
/
data-modeling-lab3.sql
File metadata and controls
175 lines (154 loc) · 5.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
CREATE TYPE vertex_type
AS ENUM('player', 'team', 'game');
CREATE TABLE vertices (
identifier TEXT,
TYPE vertex_type,
properties JSON,
PRIMARY KEY (identifier, type)
);
CREATE TYPE edge_type AS
ENUM('plays_against', 'shares_team', 'plays_in', 'plays_on');
-- DROP type edge_type cascade; -- Eliminar un type
CREATE TABLE edges (
subject_identifier TEXT,
subject_type vertex_type,
object_identifier TEXT,
object_type vertex_type,
edge_type edge_type,
properties JSON,
PRIMARY KEY (subject_identifier, subject_type, object_identifier, object_type, edge_type)
);
-- Los tipos creados son como variables, las cuales pueden ser llamar luego para asignar un valor a un columna
INSERT INTO vertices(
SELECT game_id AS identifier,
'game'::vertex_type AS TYPE,
json_build_object('pts_home', pts_home,
'pts_away', pts_away,
'wininning_team', CASE WHEN home_team_wins = 1 THEN home_team_id ELSE visitor_team_id END
) AS properties
FROM games);
SELECT * FROM vertices;
-- ARRAY_AGG ingresa dentro un array a todos los valores iguales en el resto de las columnas, evitando de esta manera cargar de mas las consulta
INSERT INTO vertices(
WITH players_agg AS(
SELECT
player_id AS identifier,
MAX(player_name) AS player_name, -- Necesario porque player_name no está en GROUP BY
'player'::vertex_type AS TYPE,
COUNT(1) AS number_of_games,
SUM(pts) AS total_points,
ARRAY_AGG(DISTINCT team_id) AS teams
FROM game_details
GROUP BY player_id
)
SELECT identifier, 'player'::vertex_type,
json_build_object(
'player_name', player_name,
'number_of_games', number_of_games,
'total_points', total_points,
'teams', teams
)
FROM players_agg);
-- SELECT *, ROW_NUMBER() OVER(PARTITION BY team_id):
-- enumerar elementos dentro de grupos.
-- filtrar la primera fila de cada grupo (por ejemplo, usando WHERE row_num = 1 en una subconsulta).
-- resolver duplicados o seleccionar registros más recientes por grupo
INSERT INTO vertices(
WITH teams_deduped AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY team_id) AS row_num
FROM teams)
SELECT team_id AS identifier,
'team'::vertex_type AS type,
json_build_object(
'abbreviation', abbreviation,
'nickname', nickname,
'city', city,
'arena', arena,
'year_founded', yearfounded)
FROM teams_deduped
WHERE row_num = 1);
-- Se podria llamar count(type) pero no contaria lo valores nulos, por tal motivo se usa count(1) o count(*)
SELECT type, COUNT(*)
FROM vertices
GROUP BY type;
INSERT INTO edges(
WITH edges_deduped AS(
SELECT *, ROW_NUMBER() OVER(PARTITION BY player_id, game_id) AS row_num
FROM game_details)
SELECT
player_id AS subject_identifier,
'player'::vertex_type AS subject_type,
game_id AS object_identifier,
'game'::vertex_type AS object_type,
'plays_in'::edge_type AS edge_type,
json_build_object(
'start_position', start_position,
'pts', pts,
'team_id', team_id,
'team_abbreviation', team_abbreviation) AS properties
FROM edges_deduped
WHERE row_num = 1);
-- El operador ->> en PostgreSQL devuelve el valor como texto plano.
SELECT
v.properties->>'player_name' AS player_name, -- buscar dentro de los json el valor dado, en este caso player_name
MAX(CAST(e.properties->>'pts' AS INTEGER))
FROM vertices v
JOIN edges e
ON e.subject_identifier = v.identifier
AND e.subject_type = v.TYPE
GROUP BY 1
ORDER BY 2 DESC;
INSERT INTO edges
WITH deduped AS(
SELECT *, ROW_NUMBER() OVER(PARTITION BY player_id, game_id) AS row_num
FROM game_details),
filtered AS (
SELECT *
FROM deduped
WHERE row_num = 1
),
aggregated AS (
SELECT f1.player_id AS subject_player_id,
f2.player_id AS object_player_id,
CASE WHEN f1.team_abbreviation = f2.team_abbreviation
THEN 'shares_team'::edge_type ELSE 'plays_against'::edge_type END AS edge_type,
MAX(f1.player_name) AS subject_player_name,
MAX(f2.player_name) AS object_player_name,
COUNT(1) AS num_games,
SUM(f1.pts) AS subject_points,
SUM(f2.pts) AS object_points
FROM filtered f1
JOIN filtered f2
ON f1.game_id = f2.game_id
AND f1.player_name <> f2.player_name
WHERE f1.player_id > f2.player_id
GROUP BY f1.player_id,
f2.player_id,
CASE WHEN f1.team_abbreviation = f2.team_abbreviation
THEN 'shares_team'::edge_type ELSE 'plays_against'::edge_type END)
SELECT subject_player_id AS subject_identifier,
'player'::vertex_type AS subject_type,
object_player_id AS object_identifier,
'player'::vertex_type AS object_type,
edge_type AS edge_type,
json_build_object(
'num_games', num_games,
'subject_points', subject_points,
'object_points', object_points
)
FROM aggregated;
SELECT
v.properties->>'player_name' AS player_name,
e.object_identifier,
CAST(v.properties->>'number_of_games' AS REAL) /
CASE WHEN CAST(v.properties->>'total_points' AS REAL) = 0 THEN 1
ELSE CAST(v.properties->>'total_points' AS REAL) END AS pct_point_per_game_career,
e.properties->>'subject_points' AS total_points_scores_vs_object,
e.properties->>'num_games' AS games_played
FROM vertices v
JOIN edges e
ON v.identifier = e.subject_identifier
AND v.type = e.subject_type
WHERE e.object_type = 'player'::vertex_type AND CAST(v.properties->>'number_of_games' AS REAL) /
CASE WHEN CAST(v.properties->>'total_points' AS REAL) = 0 THEN 1
ELSE CAST(v.properties->>'total_points' AS REAL) END IS NOT NULL;