diff --git a/exercise01/Exercise01_SQL_Brush_Up.ipynb b/exercise01/Exercise01_SQL_Brush_Up.ipynb
index 0a23d26..3cf5035 100644
--- a/exercise01/Exercise01_SQL_Brush_Up.ipynb
+++ b/exercise01/Exercise01_SQL_Brush_Up.ipynb
@@ -49,7 +49,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -62,7 +62,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -72,9 +72,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://postgres:***@postgres:5432/discogs\n",
+ "1 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ " \n",
+ " | version | \n",
+ "
\n",
+ " \n",
+ " | PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "[('PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit',)]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"%%sql\n",
"SELECT version();"
@@ -106,9 +135,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://postgres:***@postgres:5432/discogs\n",
+ "0 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " | table_name | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"%%sql \n",
"SELECT table_name\n",
@@ -126,9 +181,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://postgres:***@postgres:5432/discogs\n",
+ "0 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " | table_name | \n",
+ " column_name | \n",
+ " data_type | \n",
+ " is_nullable | \n",
+ " ordinal_position | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"%%sql \n",
"SELECT table_name, column_name, data_type, is_nullable, ordinal_position\n",
@@ -417,7 +502,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -431,7 +516,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.2"
+ "version": "3.9.6"
}
},
"nbformat": 4,
diff --git a/pics/ACID.png b/pics/ACID.png
new file mode 100644
index 0000000..6a62faa
Binary files /dev/null and b/pics/ACID.png differ
diff --git a/pics/lecture_review.png b/pics/lecture_review.png
new file mode 100644
index 0000000..51927b4
Binary files /dev/null and b/pics/lecture_review.png differ
diff --git a/pics/query_examle.png b/pics/query_examle.png
new file mode 100644
index 0000000..955edfb
Binary files /dev/null and b/pics/query_examle.png differ
diff --git a/pics/query_plan.png b/pics/query_plan.png
new file mode 100644
index 0000000..ff3182e
Binary files /dev/null and b/pics/query_plan.png differ
diff --git a/pics/rest_scaleup.png b/pics/rest_scaleup.png
new file mode 100644
index 0000000..777cdae
Binary files /dev/null and b/pics/rest_scaleup.png differ
diff --git a/progress.md b/progress.md
new file mode 100644
index 0000000..9ec0a1d
--- /dev/null
+++ b/progress.md
@@ -0,0 +1,105 @@
+# 2023_12_11 introduction
+
+initialise my onw exercise repository to correct commit -m
+
+the excice use docker to building envrioemnt, but I should do everything step by myself.
+or learn the docker myself as mention in the introduction and then check what's the meaning of each dockerfile
+"Self-study: Docker for your laptop, Azure for large-scale clusters"
+progress.md (END)
+
+
+
+## exercise
+ enviromemnt set up
+
+## docker
+https://docs.google.com/document/d/17HJ9-ljkLxP_HdkZKHjpj4BH67jBDIzxaU0EKciUkS8/edit?pli=1#heading=h.cilq0rusrnzq
+basic usuage for this repository, check readme file
+
+## exercise
+the problem for course 2021 is that some materail seems not avaiable anymore , try course 2023 instead
+ enviromemnt set up
+
+Basic Usage
+Check out this repository using git. In the folder of a particular exercise (that contains a docker-compose.yml file), run the following command in a terminal:
+docker-compose up
+
+In most if not all weeks, one of the services is a Jupyter notebook server with which you can use the exercise notebooks interactively. You can access the server by accessing http://localhost:8888 in your favorite browser.
+
+# 2023_12_13 lessons learnt: SQL
+this is basicl database knowledge (SQL)
+read provided textbook
+
+Data independence :
+ logical data mode : table (maily), doesnt change , other like trees, graph, cude
+ physical storgae : various
+directly use python and R is not best option as it dont follow the Data independence
+
+Over architecture:
+language: SQL
+model : table
+compute: cpu
+Storage: disk
+
+relational table:
+Atrribute;column;Field;Property
+Primary key;Row ID; Name
+Row;Businesss Object;Item;entity;Document;Record
+
+
+table as a (mathematic) relation : a subset of the Cartesian product
+of the domains
+A relation R is :
+1. a set of atrribute
+2. a extension (set of tuple)
+tuple: more intuitive disvlae
+S: Atrribute
+V: values
+
+rules of relational table:
+1: relational integrity: all records have same atrributes
+2: Atomic integrity (1st normal ): no sub-table
+3: domain integrity : each column has same datatype/domain/schema
+
+relational algebra:
+Set querys: Union, intersection, sbustrction ...
+Filter queries: selection, projection(selectin attributes)...
+renaming queries:
+joining queries: cartesian product, join (merge by matches)
+grouping, sorting ???
+
+Normal forms:
+it means "best practice "
+to make sure consistences:
+ * update anomaly
+ * delete anomaly
+ * insert anomaly
+1st normal form (tabular) - the key (atomic integrity):
+2nd normal form (not joined) - the whole key : no partial dependency, the primary key fullly decide other atrribute
+3rd normal form - nothing but the key : attributes only depend on the primary key
+__but in the big data, we normally drop normal forms__
+
+SQL brush up
+declarative language: tell what want,not how
+pronounciatin of "SQL": See-kwel or just S-Q-L
+
+query plan
+
+
+
+transaction : ACID
+Atomicity :???
+consistency
+Isolation
+Durability
+
+__again in the big data, we normally drop normal forms__
+
+performance:
+indices
+OLTP : online transaction procssing , write intensive
+OLAP : online analytical processing, read-intensive
+Mind data shape
+
+Data scale up
+
\ No newline at end of file