-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithubdbreduced.sql
More file actions
222 lines (193 loc) · 8.4 KB
/
githubdbreduced.sql
File metadata and controls
222 lines (193 loc) · 8.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
-- Creating Tables & Entities --
CREATE TABLE Accounts(
account_id INTEGER,
account_type TEXT,
PRIMARY KEY(account_id));
CREATE TABLE Users(
user_id INTEGER,
username VARCHAR(39) NOT NULL UNIQUE,
email TEXT NOT NULL UNIQUE,
password TEXT,
bio TEXT,
location TEXT,
status CHECK(status IN ('active', 'suspended')),
PRIMARY KEY(user_id),
FOREIGN KEY(user_id) REFERENCES Accounts(account_id));
CREATE TABLE Followers(
user_id INTEGER,
follower_id INTEGER,
PRIMARY KEY(user_id, follower_id),
FOREIGN KEY(user_id) REFERENCES Users(user_id) ON DELETE CASCADE,
FOREIGN KEY(follower_id) REFERENCES Users(user_id) ON DELETE CASCADE);
CREATE TABLE Organizations(
org_id INTEGER,
name VARCHAR(39) NOT NULL UNIQUE,
bio TEXT,
location TEXT,
PRIMARY KEY(org_id),
FOREIGN KEY(org_id) REFERENCES Accounts(account_id));
CREATE TABLE Membership( -- weak entity reliant on user_id and org_id for identification --
membership_id INTEGER,
role TEXT NOT NULL,
joined_at TEXT NOT NULL,
user_id,
org_id,
PRIMARY KEY(membership_id),
FOREIGN KEY(user_id) REFERENCES Users(user_id) ON DELETE CASCADE,
FOREIGN KEY(org_id) REFERENCES Organizations(org_id) ON DELETE CASCADE,
UNIQUE(user_id, org_id)); -- A user can't be part of the same organization more than once
CREATE TABLE Repositories(
repo_id INTEGER,
name TEXT NOT NULL,
description TEXT,
visibility TEXT NOT NULL CHECK(visibility IN ('public', 'private')),
owner_user_id,
owner_org_id,
PRIMARY KEY(repo_id),
FOREIGN KEY(owner_user_id) REFERENCES Users(user_id) ON DELETE CASCADE,
FOREIGN KEY(owner_org_id) REFERENCES Organizations(org_id) ON DELETE CASCADE,
CHECK((owner_user_id IS NULL) != (owner_org_id IS NULL)));
CREATE TABLE Commits(
commit_id TEXT,
author_id,
repo_id,
message TEXT NOT NULL,
timestamp,
PRIMARY KEY(commit_id),
FOREIGN KEY(author_id) REFERENCES Users(user_id) ON DELETE SET NULL,
FOREIGN KEY(repo_id) REFERENCES Repositories(repo_id) ON DELETE CASCADE);
CREATE TABLE Pull_requests(
pr_id INTEGER,
repo_id,
title,
description,
status TEXT CHECK(status IN ('open', 'closed', 'merged')),
creator_id,
PRIMARY KEY(pr_id),
FOREIGN KEY(repo_id) REFERENCES Repositories(repo_id) ON DELETE CASCADE,
FOREIGN KEY(creator_id) REFERENCES Users(user_id) ON DELETE SET NULL);
-- Inserting data values into each table --
-- Accounts (must be inserted first)
INSERT INTO Accounts VALUES (1, 'user');
INSERT INTO Accounts VALUES (2, 'user');
INSERT INTO Accounts VALUES (3, 'user');
INSERT INTO Accounts VALUES (4, 'user');
INSERT INTO Accounts VALUES (5, 'user');
INSERT INTO Accounts VALUES (6, 'organization');
INSERT INTO Accounts VALUES (7, 'organization');
INSERT INTO Accounts VALUES (8, 'organization');
-- Users (user_id matches account_id)
INSERT INTO Users VALUES (1, 'mintz', 'mintz@email.com', 'hashed_pw1', 'AI developer', 'Leiden, NL', 'active');
INSERT INTO Users VALUES (2, 'janek', 'janek@email.com', 'hashed_pw2', 'Full stack dev', 'Amsterdam, NL', 'active');
INSERT INTO Users VALUES (3, 'ceco', 'ceco@email.com', 'hashed_pw3', 'Data scientist', 'Rotterdam, NL', 'active');
INSERT INTO Users VALUES (4, 'travis', 'travis@email.com', 'hashed_pw4', 'DevOps engineer', 'Utrecht, NL', 'active');
INSERT INTO Users VALUES (5, 'mara', 'mara@email.com', 'hashed_pw5', 'Backend dev', 'Den Haag, NL', 'suspended');
-- Followers (user_id matches account_id)
INSERT INTO Followers VALUES (1, 2);
INSERT INTO Followers VALUES (1, 3);
INSERT INTO Followers VALUES (1, 4);
INSERT INTO Followers VALUES (1, 5);
INSERT INTO Followers VALUES (2, 1);
INSERT INTO Followers VALUES (2, 3);
INSERT INTO Followers VALUES (2, 4);
INSERT INTO Followers VALUES (2, 5);
INSERT INTO Followers VALUES (3, 1);
INSERT INTO Followers VALUES (3, 2);
INSERT INTO Followers VALUES (3, 4);
INSERT INTO Followers VALUES (3, 5);
INSERT INTO Followers VALUES (4, 1);
INSERT INTO Followers VALUES (4, 2);
INSERT INTO Followers VALUES (4, 3);
INSERT INTO Followers VALUES (4, 5);
INSERT INTO Followers VALUES (5, 1);
INSERT INTO Followers VALUES (5, 2);
INSERT INTO Followers VALUES (5, 3);
INSERT INTO Followers VALUES (5, 4);
-- Organizations (org_id matches account_id, starting from 6)
INSERT INTO Organizations VALUES (6, 'promptshop', 'AI start up', 'Amsterdam, NL');
INSERT INTO Organizations VALUES (7, 'leiden-uni', 'Leiden University', 'Leiden, NL');
INSERT INTO Organizations VALUES (8, 'opendev', 'Open source org', 'Utrecht, NL');
-- Membership
INSERT INTO Membership VALUES (1, 'admin', '2024-01-01', 1, 6);
INSERT INTO Membership VALUES (2, 'member', '2024-02-01', 2, 6);
INSERT INTO Membership VALUES (3, 'admin', '2024-03-01', 3, 7);
INSERT INTO Membership VALUES (4, 'member', '2024-04-01', 3, 8);
INSERT INTO Membership VALUES (5, 'member', '2024-05-01', 5, 8);
-- Repositories
INSERT INTO Repositories VALUES (1, 'promptshop-api', 'REST API for PromptShop', 'public', 1, NULL);
INSERT INTO Repositories VALUES (2, 'ml-models', 'ML model collection', 'private', 2, NULL);
INSERT INTO Repositories VALUES (3, 'leiden-tools', 'University utility tools', 'public', NULL, 7);
INSERT INTO Repositories VALUES (4, 'opendev-core', 'Core open source library', 'public', NULL, 8);
INSERT INTO Repositories VALUES (5, 'data-pipeline', 'Data pipeline framework', 'private', 3, NULL);
INSERT INTO Repositories VALUES (6, 'portfolio', 'Personal portfolio', 'public', 3, NULL);
-- Commits
INSERT INTO Commits VALUES ('a1b2c3d4', 1, 1, 'Initial commit', '2024-01-10T09:00:00');
INSERT INTO Commits VALUES ('b2c3d4e5', 2, 1, 'Add auth module', '2024-01-15T11:00:00');
INSERT INTO Commits VALUES ('c3d4e5f6', 2, 1, 'Fix auth bug', '2024-01-20T14:00:00');
INSERT INTO Commits VALUES ('d4e5f6g7', 3, 2, 'Add ML base model', '2024-02-10T10:00:00');
INSERT INTO Commits VALUES ('e5f6g7h8', 5, 3, 'University tools init', '2024-03-01T08:00:00');
INSERT INTO Commits VALUES ('f6g7h8i9', 1, 2, 'Improve README', '2024-01-25T16:00:00');
INSERT INTO Commits VALUES ('g7h8i9j0', 4, 3, 'Add ResNet50 model', '2024-04-01T13:00:00');
-- Pull_Requests
INSERT INTO Pull_Requests VALUES (1, 1, 'Add authentication', 'Implement OAuth2 flow', 'merged', 1);
INSERT INTO Pull_Requests VALUES (2, 1, 'Fix auth edge case', NULL, 'open', 2);
INSERT INTO Pull_Requests VALUES (3, 2, 'Add ResNet model', 'Deep learning image classifier', 'merged', 2);
INSERT INTO Pull_Requests VALUES (4, 3, 'Improve data pipeline', NULL, 'open', 3);
INSERT INTO Pull_Requests VALUES (5, 4, 'Fix documentation typo', NULL, 'closed', 4);
-- View all tables --
SELECT * FROM Accounts;
SELECT * FROM Users;
SELECT * FROM Followers;
SELECT * FROM Organizations;
SELECT * FROM Membership;
SELECT * FROM Repositories;
SELECT * FROM Commits;
SELECT * FROM Pull_Requests;
-----=== QUERIES ===-----
--1
SELECT Users.username, Repositories.name AS repository_name FROM Users JOIN Repositories ON Users.user_id = Repositories.owner_user_id;
-- Outputs each user's username that owns a repository and the repositories they own.
-- Actual Output:
-- username, repository_name
-- mintz, promptshop-api
-- janek, ml-models
-- ceco, data-pipeline
-- ceco, portfolio
--2
SELECT org_id, COUNT(user_id) AS member_count FROM Membership GROUP BY org_id HAVING COUNT(user_id) > 1;
-- Outputs IDs of organizations with more than one member. Shows total number of members in each organization as well.
-- Actual Output:
-- org_id, member_count
-- 6, 2
-- 7, 2
--3
SELECT username FROM Users EXCEPT SELECT username FROM Users WHERE user_id = 3;
-- Outputs all usernames except for 'ceco'.
-- Actual Output:
-- username
-- janek
-- mara
-- mintz
-- travis
--4
SELECT repo_id, name FROM Repositories
WHERE (SELECT COUNT(pr_id) FROM Pull_Requests WHERE Pull_Requests.repo_id = Repositories.repo_id AND Pull_Requests.status = 'open') >= 1;
-- Outputs the repositories that have at least one unresolved pull requests.
-- Actual Output:
-- repo_id, name
-- 1, promptshop-api
-- 3, leiden-tools
--5
SELECT account_type, COUNT(account_id) * 100.0 / (SELECT COUNT(*) FROM Accounts)
AS percentage FROM Accounts GROUP BY account_type;
-- Outputs the percentage of accounts that are either a user or an organization.
-- Actual Output:
-- account_type, percentage
-- organization, 37.5
-- user, 62.5
--6
SELECT name, bio FROM Organizations WHERE name LIKE '%AI%' OR bio LIKE '%AI%';
-- Show all "AI-focused" organizations that mention AI in their name or bio
-- Actual Output
-- name, bio
-- promptshop, AI start up