diff --git a/exercise01/Exercise01_SQL_Brush_Up.ipynb b/exercise01/Exercise01_SQL_Brush_Up.ipynb index 0a23d26..3cf5035 100644 --- a/exercise01/Exercise01_SQL_Brush_Up.ipynb +++ b/exercise01/Exercise01_SQL_Brush_Up.ipynb @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -72,9 +72,38 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * postgresql://postgres:***@postgres:5432/discogs\n", + "1 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
version
PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit
" + ], + "text/plain": [ + "[('PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit',)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%sql\n", "SELECT version();" @@ -106,9 +135,35 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * postgresql://postgres:***@postgres:5432/discogs\n", + "0 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + "
table_name
" + ], + "text/plain": [ + "[]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%sql \n", "SELECT table_name\n", @@ -126,9 +181,39 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * postgresql://postgres:***@postgres:5432/discogs\n", + "0 rows affected.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
table_namecolumn_namedata_typeis_nullableordinal_position
" + ], + "text/plain": [ + "[]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%sql \n", "SELECT table_name, column_name, data_type, is_nullable, ordinal_position\n", @@ -417,7 +502,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -431,7 +516,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/pics/ACID.png b/pics/ACID.png new file mode 100644 index 0000000..6a62faa Binary files /dev/null and b/pics/ACID.png differ diff --git a/pics/lecture_review.png b/pics/lecture_review.png new file mode 100644 index 0000000..51927b4 Binary files /dev/null and b/pics/lecture_review.png differ diff --git a/pics/query_examle.png b/pics/query_examle.png new file mode 100644 index 0000000..955edfb Binary files /dev/null and b/pics/query_examle.png differ diff --git a/pics/query_plan.png b/pics/query_plan.png new file mode 100644 index 0000000..ff3182e Binary files /dev/null and b/pics/query_plan.png differ diff --git a/pics/rest_scaleup.png b/pics/rest_scaleup.png new file mode 100644 index 0000000..777cdae Binary files /dev/null and b/pics/rest_scaleup.png differ diff --git a/progress.md b/progress.md new file mode 100644 index 0000000..9ec0a1d --- /dev/null +++ b/progress.md @@ -0,0 +1,105 @@ +# 2023_12_11 introduction + +initialise my onw exercise repository to correct commit -m + +the excice use docker to building envrioemnt, but I should do everything step by myself. +or learn the docker myself as mention in the introduction and then check what's the meaning of each dockerfile +"Self-study: Docker for your laptop, Azure for large-scale clusters" +progress.md (END) + +![overview](pics/lecture_review.png) + +## exercise + enviromemnt set up + +## docker +https://docs.google.com/document/d/17HJ9-ljkLxP_HdkZKHjpj4BH67jBDIzxaU0EKciUkS8/edit?pli=1#heading=h.cilq0rusrnzq +basic usuage for this repository, check readme file + +## exercise +the problem for course 2021 is that some materail seems not avaiable anymore , try course 2023 instead + enviromemnt set up + +Basic Usage +Check out this repository using git. In the folder of a particular exercise (that contains a docker-compose.yml file), run the following command in a terminal: +docker-compose up + +In most if not all weeks, one of the services is a Jupyter notebook server with which you can use the exercise notebooks interactively. You can access the server by accessing http://localhost:8888 in your favorite browser. + +# 2023_12_13 lessons learnt: SQL +this is basicl database knowledge (SQL) +read provided textbook + +Data independence : + logical data mode : table (maily), doesnt change , other like trees, graph, cude + physical storgae : various +directly use python and R is not best option as it dont follow the Data independence + +Over architecture: +language: SQL +model : table +compute: cpu +Storage: disk + +relational table: +Atrribute;column;Field;Property +Primary key;Row ID; Name +Row;Businesss Object;Item;entity;Document;Record + + +table as a (mathematic) relation : a subset of the Cartesian product +of the domains +A relation R is : +1. a set of atrribute +2. a extension (set of tuple) +tuple: more intuitive disvlae +S: Atrribute +V: values + +rules of relational table: +1: relational integrity: all records have same atrributes +2: Atomic integrity (1st normal ): no sub-table +3: domain integrity : each column has same datatype/domain/schema + +relational algebra: +Set querys: Union, intersection, sbustrction ... +Filter queries: selection, projection(selectin attributes)... +renaming queries: +joining queries: cartesian product, join (merge by matches) +grouping, sorting ??? + +Normal forms: +it means "best practice " +to make sure consistences: + * update anomaly + * delete anomaly + * insert anomaly +1st normal form (tabular) - the key (atomic integrity): +2nd normal form (not joined) - the whole key : no partial dependency, the primary key fullly decide other atrribute +3rd normal form - nothing but the key : attributes only depend on the primary key +__but in the big data, we normally drop normal forms__ + +SQL brush up +declarative language: tell what want,not how +pronounciatin of "SQL": See-kwel or just S-Q-L + +query plan +![query plan](pics/query_plan.png) +![one example](pics/query_examle.png) + +transaction : ACID +Atomicity :??? +consistency +Isolation +Durability +![acid](pics/ACID.png) +__again in the big data, we normally drop normal forms__ + +performance: +indices +OLTP : online transaction procssing , write intensive +OLAP : online analytical processing, read-intensive +Mind data shape + +Data scale up +![rest of the lecture](pics/rest_scaleup.png) \ No newline at end of file