-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfact_data_modeling_lab3.sql
More file actions
62 lines (51 loc) · 1.96 KB
/
fact_data_modeling_lab3.sql
File metadata and controls
62 lines (51 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
CREATE TABLE array_metrics(
user_id NUMERIC,
month_start DATE,
metric_name TEXT,
metric_array REAL[],
PRIMARY KEY (user_id, month_start, metric_name)
);
-- Borrar todos los registro dentro de una tabla
DELETE FROM array_metrics;
INSERT INTO array_metrics
WITH daily_aggregate AS (
SELECT user_id,
DATE(event_time) AS date,
COUNT(1) AS num_site_hits
FROM events
WHERE DATE(event_time) = DATE('2023-01-04') -- Al mover el dia de en esta fecha, agrega al array un record mas, mas no agregara la fecha o el dia ingresado, esto empezo como (2023-01-01)
AND user_id IS NOT NULL
GROUP BY user_id, DATE(event_time)
), yesterday_array AS (
SELECT *
FROM array_metrics
WHERE month_start = DATE('2023-01-01')
)
SELECT COALESCE(da.user_id, ya.user_id) AS user_id,
COALESCE(ya.month_start, DATE_TRUNC('month', da.date)) AS month_start,
'site_hits' AS metric_name,
CASE WHEN ya.metric_name IS NOT NULL THEN
ya.metric_array || ARRAY[COALESCE(da.num_site_hits, 0)]
WHEN ya.metric_name IS NULL THEN
ARRAY_FILL(0, ARRAY[COALESCE(date - DATE(DATE_TRUNC('month', date)), 0)]) || ARRAY[COALESCE(da.num_site_hits, 0)]
END AS metric_array
FROM daily_aggregate da
FULL OUTER JOIN yesterday_array ya
ON da.user_id = ya.user_id
ON CONFLICT(user_id, month_start, metric_name)
DO
UPDATE SET metric_array = EXCLUDED.metric_array;
SELECT * FROM array_metrics;
SELECT cardinality(metric_array), COUNT(1) FROM array_metrics GROUP BY 1;
WITH agg AS (
SELECT metric_name,
month_start,
ARRAY[SUM(metric_array[1]), SUM(metric_array[2]), SUM(metric_array[3]), SUM(metric_array[4])] AS summed_array
FROM array_metrics
GROUP BY metric_name, month_start
)
SELECT metric_name,
month_start + CAST(CAST(index - 1 AS TEXT) || 'day' AS INTERVAL),
elem AS value
FROM agg
CROSS JOIN UNNEST(agg.summed_array) WITH ORDINALITY AS a(elem, index);