diff --git a/materials/x18/lab/1/lab00/lab00.ipynb b/materials/x18/lab/1/lab00/lab00.ipynb index 7c18b56..c75742c 100644 --- a/materials/x18/lab/1/lab00/lab00.ipynb +++ b/materials/x18/lab/1/lab00/lab00.ipynb @@ -44,7 +44,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(\"Hello, World!\")" @@ -60,7 +62,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(\"\\N{WAVING HAND SIGN}, \\N{EARTH GLOBE ASIA-AUSTRALIA}!\")" @@ -76,7 +80,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(\"First this line is printed,\")\n", @@ -136,7 +142,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "print(\"This line is missing something.\"" @@ -182,7 +190,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Don't change this cell, just run it\n", @@ -200,7 +210,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check(\"tests/q0.py\")" @@ -216,7 +228,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import glob\n", @@ -242,10 +256,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/1/lab01/lab01.ipynb b/materials/x18/lab/1/lab01/lab01.ipynb index ea1b5b9..9e4ae9b 100644 --- a/materials/x18/lab/1/lab01/lab01.ipynb +++ b/materials/x18/lab/1/lab01/lab01.ipynb @@ -24,6 +24,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -42,6 +43,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -72,6 +74,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -96,7 +99,7 @@ }, "outputs": [], "source": [ - "2+6*5-6*3**2*2**3/4*7" + "3+6*5-6*3**2*2**3/4*7" ] }, { @@ -107,7 +110,7 @@ }, "outputs": [], "source": [ - "2+(6*5-(6*3))**2*((2**3)/4*7)" + "3+(6*5-(6*3))**2*((2**3)/4*7)" ] }, { @@ -116,13 +119,13 @@ "source": [ "In standard math notation, the first expression is\n", "\n", - "$$2 + 6 \\times 5 - 6 \\times 3^2 \\times \\frac{2^3}{4} \\times 7,$$\n", + "$$3 + 6 \\times 5 - 6 \\times 3^2 \\times \\frac{2^3}{4} \\times 7,$$\n", "\n", "while the second expression is\n", "\n", - "$$2 + (6 \\times 5 - (6 \\times 3))^2 \\times (\\frac{(2^3)}{4} \\times 7).$$\n", + "$$3 + (6 \\times 5 - (6 \\times 3))^2 \\times (\\frac{(2^3)}{4} \\times 7).$$\n", "\n", - "**Question 1.1.1.**
Write a Python expression in this next cell that's equal to $5 \\times (3 \\frac{10}{11}) - 49 \\frac{1}{3} + 2^{.5 \\times 22} - \\frac{7}{33}$. That's five times three and ten elevenths, minus 49 and a third, plus two to the power of half of 22, minus 7 33rds. By \"$3 \\frac{10}{11}$\" we mean $3+\\frac{10}{11}$, not $3 \\times \\frac{10}{11}$.\n", + "**Question 1.1.1.**
Write a Python expression in this next cell that's equal to $5 \\times (3 \\frac{10}{11}) - 49 \\frac{1}{3} + 2^{.5 \\times 22} + \\frac{26}{33}$. That's five times three and ten elevenths, minus 49 and a third, plus two to the power of half of 22, plus 26 33rds. By \"$3 \\frac{10}{11}$\" we mean $3+\\frac{10}{11}$, not $3 \\times \\frac{10}{11}$.\n", "\n", "Replace the ellipses (`...`) with your expression. Try to use parentheses only when necessary.\n", "\n", @@ -133,6 +136,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -156,6 +160,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -176,6 +181,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -202,6 +208,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -218,6 +225,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -237,6 +245,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -261,6 +270,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -294,6 +304,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -322,7 +333,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# These lines load the tests.\n", @@ -344,6 +357,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -381,6 +395,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -432,6 +447,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -449,6 +465,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -469,6 +486,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -503,6 +521,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -525,6 +544,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -536,6 +556,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -563,6 +584,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -596,6 +618,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -615,6 +638,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -648,6 +672,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -683,6 +708,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -701,6 +727,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -720,6 +747,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -757,6 +785,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -778,6 +807,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -803,6 +833,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -831,7 +862,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Don't change this cell\n", @@ -849,7 +882,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "school = Table.read_table('school.csv')\n", @@ -869,7 +904,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "top_1970 = ...\n", @@ -879,7 +916,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q51.py')" @@ -896,6 +935,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -913,7 +953,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "top_1970_with_2009 = ...\n", @@ -923,7 +965,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q52.py')" @@ -940,6 +984,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -972,6 +1017,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -1008,10 +1054,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/1/lab02/lab02.ipynb b/materials/x18/lab/1/lab02/lab02.ipynb index 2ce444e..d5184af 100644 --- a/materials/x18/lab/1/lab02/lab02.ipynb +++ b/materials/x18/lab/1/lab02/lab02.ipynb @@ -91,8 +91,8 @@ "source": [ "**Question 1.1.**
In the next cell, assign the name `new_year` to the larger number among the following two numbers:\n", "\n", - "1. the absolute value of $2^{5}-2^{11}-2^1$, and \n", - "2. $5 \\times 13 \\times 31 + 2$.\n", + "1. the absolute value of $2^{5}-2^{11}-2^{1}-2^{0}$, and \n", + "2. $5 \\times 13 \\times 31 + 4$.\n", "\n", "Try to use just one statement (one line of code)." ] @@ -1483,145 +1483,6 @@ "check('tests/q435.py')" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4.4 Example: Growth Rates" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A natural example of how we can use arrays to reduce large amounts of computation is growth rates. \n", - "\n", - "**Question 4.4.1**
Let's say we are investing in stocks, and we initially invest 10.23 dollars into the market. We check back in one year later, and we see that our total money in the market is now 14.32 dollars. What was our annual growth rate?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "annual_growth_rate = ...\n", - "annual_growth_rate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "check('tests/q441.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Question 4.4.2**
If we wanted to see multiple people's annual stock growth rates, we could continue the above process per person. However, this can become tedious. \n", - "\n", - "Let's use the power of arrays! Assume that `initials` contains the initial amount of money for 5 different people, and `changed` contains the amount of money after one year for the same corresponding people. Assign `annual_growth_rates` to an array of all of the different growth rates for the 5 people. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "initials = make_array(10.21, 11.32, 15.21, 13.22, 19.10)\n", - "changed = make_array(14.20, 35.44, 10.43, 9.62, 20.10)\n", - "annual_growth_rates = ...\n", - "annual_growth_rates" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "check('tests/q442.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Question 4.4.3**
Now, let's use an array arithmetic to deduce the annual growth rate on peoples stocks given the amount of money in their market 10 years from now, found in the variable `ten_years`. Assuming everyone initially started with 10 dollars in their market, calculate the annual growth rate per person over these 10 years and assign this array of values to `annual_rates_over_ten_years`. \n", - "\n", - "*Hint*: If you don't remember this formula, check out the textbook!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "ten_years = make_array(50.32, 1.04, 0.40, 14.50, 11.12)\n", - "annual_rates_over_ten_years = ...\n", - "annual_rates_over_ten_years" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "check('tests/q443.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Question 4.4.4**
Lastly, let's use array arithmetic to figure the final amount of money in people's market 10 years from now, assuming they all invested different amounts of money (`invested`) in the same stock, DS8. The annual growth rate for DS8 was .045. Assign `money_in_ten_years` to an array of the money people ended with in the DS8 stock based on how much they initially invested.\n", - "\n", - "*Hint*: If you don't remember this formula, check out the textbook!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "invested = make_array(10,11,15,20,25)\n", - "money_in_ten_years = ...\n", - "money_in_ten_years" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "check('tests/q444.py')" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1642,9 +1503,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -1653,6 +1512,15 @@ "if not globals().get('__GOFER_GRADER__', False):\n", " display(grade_notebook('lab02.ipynb', sorted(glob.glob('tests/q*.py'))))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -1672,10 +1540,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/1/lab02/tests/q11.py b/materials/x18/lab/1/lab02/tests/q11.py index 9bbef9e..d639059 100644 --- a/materials/x18/lab/1/lab02/tests/q11.py +++ b/materials/x18/lab/1/lab02/tests/q11.py @@ -7,7 +7,7 @@ { 'code': r""" >>> new_year - 2018 + 2019 """, 'hidden': False, 'locked': False diff --git a/materials/x18/lab/1/lab02/tests/q441.py b/materials/x18/lab/1/lab02/tests/q441.py deleted file mode 100644 index c88a80f..0000000 --- a/materials/x18/lab/1/lab02/tests/q441.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> round(annual_growth_rate,4) == .3998 - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/1/lab02/tests/q442.py b/materials/x18/lab/1/lab02/tests/q442.py deleted file mode 100644 index 569aa2b..0000000 --- a/materials/x18/lab/1/lab02/tests/q442.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> all(np.round(annual_growth_rates, 3) == np.round((changed/initials)-1, 3)) - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/1/lab02/tests/q443.py b/materials/x18/lab/1/lab02/tests/q443.py deleted file mode 100644 index d30179e..0000000 --- a/materials/x18/lab/1/lab02/tests/q443.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> all(annual_rates_over_ten_years == (ten_years/10)**(1/10) - 1) - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/1/lab02/tests/q444.py b/materials/x18/lab/1/lab02/tests/q444.py deleted file mode 100644 index e9403b9..0000000 --- a/materials/x18/lab/1/lab02/tests/q444.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> all(money_in_ten_years == invested * (1.045)**10) - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/1/lab03/lab03.ipynb b/materials/x18/lab/1/lab03/lab03.ipynb index b6c7bc4..db5a783 100644 --- a/materials/x18/lab/1/lab03/lab03.ipynb +++ b/materials/x18/lab/1/lab03/lab03.ipynb @@ -16,7 +16,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np\n", @@ -37,13 +39,15 @@ "\n", "In most data science applications, we have data about many entities, but we also have several kinds of data about each entity.\n", "\n", - "For example, in the cell below we have two arrays. The first one contains the world population in each year (as [estimated](http://www.census.gov/population/international/data/worldpop/table_population.php) by the US Census Bureau), and the second contains the years themselves (in order, so the first elements in the population and the years arrays correspond)." + "For example, in the cell below we have two arrays. The first one contains the world population in each year (estimated by the US Census Bureau), and the second contains the years themselves. These elements are in order, so the year and the world population for that year have the same index in their corresponding arrays." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "population_amounts = Table.read_table(\"world_population.csv\").column(\"Population\")\n", @@ -75,7 +79,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "population = Table().with_columns(\n", @@ -104,7 +110,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "top_10_movie_ratings = make_array(9.2, 9.2, 9., 8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.8)\n", @@ -129,7 +137,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py')" @@ -150,7 +160,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "imdb = ...\n", @@ -160,7 +172,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_2.py')" @@ -195,7 +209,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Run this cell to recreate the table\n", @@ -216,7 +232,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "my_flower = ...\n", @@ -226,7 +244,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_1.py')" @@ -242,7 +262,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Use the method .with_row(...) to create a new table that includes my_flower \n", @@ -261,7 +283,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_2.py')" @@ -280,7 +304,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "imdb.column(\"Rating\")" @@ -300,7 +326,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "highest_rating = ...\n", @@ -310,7 +338,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q4_1.py')" @@ -326,7 +356,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "imdb.sort(\"Rating\")" @@ -342,7 +374,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "imdb.sort(\"Rating\", descending=True)" @@ -369,7 +403,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "imdb_by_year = ...\n", @@ -379,7 +415,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q4_2.py')" @@ -397,7 +435,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "earliest_movie_title = ...\n", @@ -407,7 +447,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q4_3.py')" @@ -426,7 +468,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "forties = imdb.where('Decade', are.equal_to(1940))\n", @@ -449,7 +493,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "average_rating_in_forties = ...\n", @@ -459,7 +505,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_1.py')" @@ -484,7 +532,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "ninety_nine = ...\n", @@ -494,7 +544,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_2.py')" @@ -528,7 +580,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "really_highly_rated = ...\n", @@ -538,7 +592,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_3.py')" @@ -556,7 +612,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "average_20th_century_rating = ...\n", @@ -568,7 +626,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_4.py')" @@ -584,7 +644,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "num_movies_in_dataset = imdb.num_rows\n", @@ -603,7 +665,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "proportion_in_20th_century = ...\n", @@ -615,7 +679,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_5.py')" @@ -638,6 +704,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -649,7 +716,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_6.py')" @@ -665,7 +734,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "year_population_crossed_6_billion = ...\n", @@ -675,7 +746,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_7.py')" @@ -696,7 +769,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "farmers_markets = ...\n", @@ -706,7 +781,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_1.py')" @@ -726,7 +803,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "num_farmers_markets_columns = ...\n", @@ -736,7 +815,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_2.py')" @@ -760,7 +841,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "farmers_markets_locations = ...\n", @@ -770,7 +853,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_3.py')" @@ -793,6 +878,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -805,7 +891,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_4.py')" @@ -825,7 +913,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "farmers_markets_without_fmid = ...\n", @@ -835,7 +925,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_5.py')" @@ -858,7 +950,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "northern_markets = ...\n", @@ -868,7 +962,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_6.py')" @@ -884,7 +980,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "berkeley_markets = ...\n", @@ -894,7 +992,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q6_7.py')" @@ -933,7 +1033,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -961,10 +1063,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/1/lab04/lab04.ipynb b/materials/x18/lab/1/lab04/lab04.ipynb index 4be7dd6..956c7c6 100644 --- a/materials/x18/lab/1/lab04/lab04.ipynb +++ b/materials/x18/lab/1/lab04/lab04.ipynb @@ -19,7 +19,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np\n", @@ -53,7 +55,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "raw_compensation = Table.read_table('raw_compensation.csv')\n", @@ -70,7 +74,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "np.average(raw_compensation.column(\"Total Pay\"))" @@ -86,7 +92,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "total_pay_type = ...\n", @@ -96,7 +104,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_1.py')" @@ -112,7 +122,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "mark_hurd_pay_string = ...\n", @@ -122,7 +134,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_2.py')" @@ -138,7 +152,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "mark_hurd_pay = ...\n", @@ -148,7 +164,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_3.py')" @@ -171,6 +189,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -183,7 +202,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_4.py')" @@ -201,7 +222,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "convert_pay_string_to_number('$42')" @@ -210,7 +233,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "convert_pay_string_to_number(mark_hurd_pay_string)" @@ -219,7 +244,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# We can also compute Safra Catz's pay in the same way:\n", @@ -253,7 +280,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "_ = interact(convert_pay_string_to_number, pay_string='$42')" @@ -339,6 +368,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -355,7 +385,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py')" @@ -375,7 +407,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "a_proportion = 2**(.5) / 2\n", @@ -386,7 +420,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_2.py')" @@ -402,7 +438,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# You should see an error when you run this. (If you don't, you might\n", @@ -424,7 +462,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def disemvowel(a_string):\n", @@ -440,7 +480,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Alternatively, you can use interact to call your function\n", @@ -450,7 +492,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_3.py')" @@ -475,7 +519,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def num_non_vowels(a_string):\n", @@ -489,7 +535,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_4.py')" @@ -507,7 +555,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "movies_by_year = Table.read_table(\"movies_by_year.csv\")\n", @@ -529,6 +579,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -546,7 +597,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# interact also allows you to pass in an array for a function argument. It will\n", @@ -557,12 +610,67 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_5.py')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Print is not the same as Return\n", + "The `print_kth_top_movie_year(k)` function prints the total gross movie sales for the year that was provided! However, since we did not return any value in this function, we can not use it after we call it. Let's look at an example of a function that prints a value but does not return it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def print_number_five():\n", + " print(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_number_five()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, if we try to use the output of `print_number_five()`, we see that we get an error when we try to add the number 5 to it!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print_number_five_output = print_number_five()\n", + "print_number_five_output + 5" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It may seem that `print_number_five()` is returning a value, 5. In reality, it just displays the number 5 to you without giving you the actual value! If your function prints out a value without returning it and you try to use it, you will run into errors so be careful!" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -577,7 +685,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "our_name_for_max = max\n", @@ -594,7 +704,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "max(2, 6)" @@ -610,7 +722,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "max" @@ -626,7 +740,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "make_array(max, np.average, are.equal_to)" @@ -642,7 +758,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "some_functions = ...\n", @@ -652,7 +770,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_1.py')" @@ -668,7 +788,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "make_array(max, np.average, are.equal_to).item(0)(4, -2, 7)" @@ -686,7 +808,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "raw_compensation.apply(convert_pay_string_to_number, \"Total Pay\")" @@ -708,7 +832,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "compensation = raw_compensation.with_column(\n", @@ -720,7 +846,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_2.py')" @@ -738,7 +866,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "average_total_pay = ...\n", @@ -748,7 +878,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_3.py')" @@ -764,7 +896,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "cash_proportion = ...\n", @@ -774,7 +908,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_4.py')" @@ -803,17 +939,31 @@ }, "outputs": [], "source": [ - "# For reference, our solution involved more than just this one line of code\n", - "...\n", + "# Definition to turn percent to number\n", + "def percent_string_to_num(percent_string):\n", + " return ...\n", + "\n", + "# Compensation table where there is a previous year\n", + "having_previous_year = ...\n", "\n", + "# Get the percent changes as numbers instead of strings\n", + "percent_changes = ...\n", + "\n", + "# Calculate the previous years pay\n", + "previous_pay = ...\n", + "\n", + "# Put the previous pay column into the compensation table\n", "with_previous_compensation = ...\n", + "\n", "with_previous_compensation" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_5.py')" @@ -829,7 +979,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "average_pay_2014 = ...\n", @@ -839,7 +991,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_6.py')" @@ -860,7 +1014,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "..." @@ -876,7 +1032,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "num_ceos_more_than_30_million = ..." @@ -892,7 +1050,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "num_ceos_more_than_30_million_2 = ...\n", @@ -902,7 +1062,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q4_3.py')" @@ -928,7 +1090,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -956,10 +1120,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/2/lab01/comparisons.png b/materials/x18/lab/2/lab01/comparisons.png new file mode 100644 index 0000000..4ea6dba Binary files /dev/null and b/materials/x18/lab/2/lab01/comparisons.png differ diff --git a/materials/x18/lab/2/lab01/lab01.ipynb b/materials/x18/lab/2/lab01/lab01.ipynb index 99bdfd1..0b024b6 100644 --- a/materials/x18/lab/2/lab01/lab01.ipynb +++ b/materials/x18/lab/2/lab01/lab01.ipynb @@ -4,9 +4,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Lab 1: Randomization, Iteration, and Probability\n", + "# Lab 1: Randomization\n", "\n", - "Welcome to Lab 1 of Data 8.2x! This week, we will go over conditionals and iteration, and introduce the concepts of randomness and probability. All of this material is covered in [Chapter 9](https://www.inferentialthinking.com/chapters/09/randomness.html) of the textbook. \n", + "Welcome to Lab 1 of Data 8.2x! \n", + "\n", + "We will go over [iteration and simulations](https://www.inferentialthinking.com/chapters/10/sampling-and-empirical-distributions.html), as well as introduce the concept of [randomness](https://www.inferentialthinking.com/chapters/09/randomness.html). \n", "\n", "First, set up the tests and imports by running the cell below." ] @@ -14,7 +16,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import numpy as np\n", @@ -34,15 +38,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In Python, Boolean values can either be `True` or `False`. We get Boolean values when using comparison operators, among which are `<` (less than), `>` (greater than), and `==` (equal to). For a complete list, refer to [Booleans and Comparison](https://www.inferentialthinking.com/chapters/09/randomness.html#Booleans-and-Comparison) at the start of Chapter 9.\n", + "In Python, Boolean values can either be `True` or `False`. We get Boolean values when using comparison operators such as `<` (less than), `>` (greater than), and `==` (equal to). A list of common comparison operators can be found below!\n", "\n", - "Run the cell below to see an example of a comparison operator in action. Three is indeed larger than one plus one." + "" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "3 > 1 + 1" @@ -58,7 +64,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "result = 10 / 2 == 5\n", @@ -75,7 +83,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "make_array(1, 5, 7, 8, 3, -1) > 3" @@ -93,7 +103,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "nachos = make_array('cheese', 'salsa', 'both', 'neither')\n", @@ -112,7 +124,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "ten_nachos = make_array('neither', 'cheese', 'both', 'both', 'cheese', 'salsa', 'both', 'neither', 'cheese', 'both')\n", @@ -123,7 +137,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_1.py')" @@ -176,6 +192,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -191,7 +208,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_2.py')" @@ -208,6 +227,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -229,7 +249,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_3.py')" @@ -248,6 +270,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -260,7 +283,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_4.py')" @@ -276,113 +301,77 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "number_wow_reactions = ...\n", "number_wow_reactions" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q1_5.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Question 1.6**
Change just the comparison operators from `==` to some other operators so that `should_be_true` is `True`." - ] - }, { "cell_type": "code", "execution_count": null, "metadata": { - "for_assignment_type": "student" + "collapsed": true }, "outputs": [], "source": [ - "should_be_true = number_cheese == number_wow_reactions == np.count_nonzero(ten_nachos == 'neither')\n", - "should_be_true" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q1_6.py')" + "check('tests/q1_5.py')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 1.7**
Complete the function `both_or_neither`, which takes in a table of nachos with reactions (just like the one from Question 4) and returns `'Wow!'` if there are more nachos with both cheese and salsa, or `'Meh.'` if there are more nachos with neither. If there are an equal number of each, return `'Okay!'`." + "## 2. Simulations and For Loops\n", + "Using a `for` statement, we can perform a task multiple times. This is known as iteration. Here, we'll simulate drawing different suits from a deck of cards. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "for_assignment_type": "student" + "collapsed": true }, "outputs": [], "source": [ - "def both_or_neither(nacho_table):\n", - " reactions = ...\n", - " number_wow_reactions = ...\n", - " number_meh_reactions = ...\n", - " if ...:\n", - " return 'Wow!'\n", - " # next condition should return 'Meh.'\n", - " ...\n", - " # next condition should return 'Okay!'\n", - " ...\n", + "suits = make_array(\"♤\", \"♡\", \"♢\", \"♧\")\n", "\n", - "many_nachos = Table().with_column('Nachos', np.random.choice(nachos, 250))\n", - "many_nachos = many_nachos.with_column('Reactions', many_nachos.apply(nacho_reaction, 'Nachos'))\n", - "result = both_or_neither(many_nachos)\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q1_7.py')" + "draws = make_array()\n", + "\n", + "repetitions = 6\n", + "\n", + "for i in np.arange(repetitions):\n", + " draws = np.append(draws, np.random.choice(suits))\n", + "\n", + "draws" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. Iteration and Sampling\n", - "Using a `for` statement, we can perform a task multiple times. This is known as iteration. Here, we'll simulate drawing different suits from a deck of cards. " + "The unrolled version of this `for` loop can be found below." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "suits = make_array(\"♤\", \"♡\", \"♢\", \"♧\")\n", - "\n", "draws = make_array()\n", "\n", - "repetitions = 6\n", - "\n", - "for i in np.arange(repetitions):\n", - " draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", + "draws = np.append(draws, np.random.choice(suits))\n", "\n", "draws" ] @@ -393,7 +382,7 @@ "source": [ "In the example above, the `for` loop appends a random draw to the `draws` array for every number in `np.arange(repetitions)`. \n", "\n", - "A nice way to think about what we did above, was we had a deck of 4 cards of different suits. We randomly drew one card, saw the suit, kept track of it in `draws`, and put the card back into the deck. We repeated this for a total of 6 times without having to repeat code, thanks to the for loop. We simulated thie experiment using a for loop. \n", + "Here's a nice way to think of what we did above. We had a deck of 4 cards of different suits, we randomly drew one card, saw the suit, kept track of it in `draws`, and put the card back into the deck. We repeated this for a total of 6 times without having to repeat code, thanks to the `for` loop. We simulated this experiment using a `for` loop. \n", "\n", "Another use of iteration is to loop through a set of values. For instance, we can print out all of the colors of the rainbow.\n" ] @@ -401,7 +390,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "rainbow = make_array(\"red\", \"orange\", \"yellow\", \"green\", \"blue\", \"indigo\", \"violet\")\n", @@ -420,7 +411,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "for another_name in rainbow:\n", @@ -450,6 +443,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -464,7 +458,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py')" @@ -474,33 +470,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 2.2**
What is the average point value of a dart thrown by Clay?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "average_score = ...\n", - "average_score" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q2_2.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Question 2.3**
In the following cell, we've loaded the text of _Pride and Prejudice_ by Jane Austen, split it into individual words, and stored these words in an array. Using a `for` loop, assign `longer_than_five` to the number of words in the novel that are more than 5 letters long.\n", + "**Question 2.2**
In the following cell, we've loaded the text of _Pride and Prejudice_ by Jane Austen, split it into individual words, and stored these words in an array. Using a `for` loop, assign `longer_than_five` to the number of words in the novel that are more than 5 letters long.\n", "\n", "*Hint*: You can find the number of letters in a word with the `len` function." ] @@ -509,6 +479,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -517,24 +488,29 @@ "p_and_p_words = np.array(austen_string.split())\n", "\n", "longer_than_five = ...\n", - " \n", + "\n", + "# a for loop would be useful here\n", + "\n", + "\n", "longer_than_five" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "check('tests/q2_3.py')" + "check('tests/q2_2.py')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 2.4**
Using simulation with 10,000 trials, assign `chance_of_all_different` to an estimate of the chance that if you pick three words from Pride and Prejudice uniformly at random (with replacement), they all have different lengths. \n", + "**Question 2.3**
Using simulation with 10,000 trials, assign `chance_of_all_different` to an estimate of the chance that if you pick three words from Pride and Prejudice uniformly at random (with replacement), they all have different lengths. \n", "\n", "*Hint*: Remember that `!=` only checks for non-equality between two items, not three. However, you can use `!=` more than once in the same line. \n", "\n", @@ -545,6 +521,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -560,47 +537,15 @@ "chance_of_all_different" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q2_4.py')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "** Question 2.5
** Quincy is drafting Basketball Players for his NBA Fantasy League. He chooses 10 times randomly from a list of players, and drafts the player regardless of whether the player has been chosen before (You could have 10 Kevin Durant's on a team!). Count how many times John Wall is chosen in a version of Quincy's draft." - ] - }, { "cell_type": "code", "execution_count": null, "metadata": { - "for_assignment_type": "student" + "collapsed": true }, "outputs": [], "source": [ - "players = [\"John Wall\", \"Steph Curry\", \"Kevin Durant\", \"Jimmy Butler\", \"Russell Westbrook\"]\n", - "draft_picks = ...\n", - "num_wall = ...\n", - "\n", - "for ... in ...:\n", - " ...\n", - "\n", - "num_wall" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "check('tests/q2_5.py')" + "check('tests/q2_3.py')" ] }, { @@ -608,20 +553,22 @@ "metadata": {}, "source": [ "## 3. Finding Probabilities\n", - "After a long day of class, Clay decides to go to Crossroads for dinner. Today's menu has Clay's four favorite foods: enchiladas, hamburgers, pizza, and spaghetti. However, each dish has a 30% chance of running out before Clay can get to Crossroads." + "After a long day of class, Clay decides to go to a food court for dinner. Today's menu has Clay's four favorite foods: enchiladas, hamburgers, pizza, and spaghetti. However, each dish has a 30% chance of running out before Clay can get to the food court." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 3.1**
What is the probability that Clay will be able to eat pizza at Crossroads?" + "**Question 3.1**
What is the probability that Clay will be able to eat pizza at the food court?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "pizza_prob = ..." @@ -630,7 +577,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_1.py')" @@ -640,13 +589,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 3.2**
What is the probability that Clay will be able to eat all four of these foods at Crossroads?" + "**Question 3.2**
What is the probability that Clay will be able to eat all four of these foods at the food court?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "all_prob = ..." @@ -655,7 +606,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_2.py')" @@ -665,13 +618,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 3.3**
What is the probability that Crossroads will have run out of something before Clay can get there?" + "**Question 3.3**
What is the probability that the food court will have run out of something before Clay can get there?" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "something_is_out = ..." @@ -680,7 +635,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_3.py')" @@ -690,7 +647,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To make up for their unpredictable food supply, Crossroads decides to hold a contest for some free Cal Dining swag. There is a bag with two red marbles, two green marbles, and two blue marbles. Clay has to draw three marbles separately. In order to win, all three of these marbles must be of different colors." + "To make up for their unpredictable food supply, the food court decides to hold a contest for some free food. There is a bag with two red marbles, two green marbles, and two blue marbles. Clay has to draw three marbles separately. In order to win, all three of these marbles must be of different colors." ] }, { @@ -703,7 +660,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "winning_prob = ..." @@ -712,7 +671,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_4.py')" @@ -728,7 +689,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -756,10 +719,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/2/lab01/tests/q1_6.py b/materials/x18/lab/2/lab01/tests/q1_6.py deleted file mode 100644 index 6f05f06..0000000 --- a/materials/x18/lab/2/lab01/tests/q1_6.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> should_be_true - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/2/lab01/tests/q1_7.py b/materials/x18/lab/2/lab01/tests/q1_7.py deleted file mode 100644 index c1dff07..0000000 --- a/materials/x18/lab/2/lab01/tests/q1_7.py +++ /dev/null @@ -1,44 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> (result == 'Wow!') or (result == 'Meh.') or (result == 'Okay!') - True - """, - 'hidden': False, - 'locked': False - }, - { - 'code': r""" - >>> ten_nachos = make_array('neither', 'cheese', 'both', 'both', 'cheese', 'salsa', 'both', 'neither', 'cheese', 'both') - >>> ten_nachos_reactions = Table().with_column('Nachos', ten_nachos) - >>> ten_nachos_reactions = ten_nachos_reactions.with_column('Reactions', ten_nachos_reactions.apply(nacho_reaction, 'Nachos')) - >>> both_or_neither(ten_nachos_reactions) - 'Wow!' - """, - 'hidden': False, - 'locked': False - }, - { - 'code': r""" - >>> seven_nachos = make_array('neither', 'cheese', 'both', 'both', 'neither', 'both', 'neither') - >>> seven_nachos_reactions = Table().with_column('Nachos', seven_nachos) - >>> seven_nachos_reactions = seven_nachos_reactions.with_column('Reactions', seven_nachos_reactions.apply(nacho_reaction, 'Nachos')) - >>> both_or_neither(seven_nachos_reactions) - 'Okay!' - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/2/lab01/tests/q2_2.py b/materials/x18/lab/2/lab01/tests/q2_2.py index 287fc0d..2d10ec0 100644 --- a/materials/x18/lab/2/lab01/tests/q2_2.py +++ b/materials/x18/lab/2/lab01/tests/q2_2.py @@ -6,7 +6,7 @@ 'cases': [ { 'code': r""" - >>> 1 <= average_score <= 10 + >>> longer_than_five == 35453 True """, 'hidden': False, diff --git a/materials/x18/lab/2/lab01/tests/q2_3.py b/materials/x18/lab/2/lab01/tests/q2_3.py index 2d10ec0..5466085 100644 --- a/materials/x18/lab/2/lab01/tests/q2_3.py +++ b/materials/x18/lab/2/lab01/tests/q2_3.py @@ -6,7 +6,7 @@ 'cases': [ { 'code': r""" - >>> longer_than_five == 35453 + >>> 0.58 <= chance_of_all_different <= 0.68 True """, 'hidden': False, diff --git a/materials/x18/lab/2/lab01/tests/q2_4.py b/materials/x18/lab/2/lab01/tests/q2_4.py deleted file mode 100644 index 5466085..0000000 --- a/materials/x18/lab/2/lab01/tests/q2_4.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> 0.58 <= chance_of_all_different <= 0.68 - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} diff --git a/materials/x18/lab/2/lab01/tests/q2_5.py b/materials/x18/lab/2/lab01/tests/q2_5.py deleted file mode 100644 index 490a27e..0000000 --- a/materials/x18/lab/2/lab01/tests/q2_5.py +++ /dev/null @@ -1,22 +0,0 @@ -test = { - 'name': '', - 'points': 1, - 'suites': [ - { - 'cases': [ - { - 'code': r""" - >>> 0 <= num_wall <= 10 - True - """, - 'hidden': False, - 'locked': False - }, - ], - 'scored': True, - 'setup': '', - 'teardown': '', - 'type': 'doctest' - } - ] -} \ No newline at end of file diff --git a/materials/x18/lab/2/lab02/lab02.ipynb b/materials/x18/lab/2/lab02/lab02.ipynb index c61b274..c1f5560 100644 --- a/materials/x18/lab/2/lab02/lab02.ipynb +++ b/materials/x18/lab/2/lab02/lab02.ipynb @@ -14,7 +14,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Run this cell, but please don't change it.\n", @@ -53,6 +55,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -70,7 +73,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_1.py')" @@ -86,7 +91,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "rough_success_chance = ..." @@ -95,7 +102,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_2.py')" @@ -114,6 +123,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -132,7 +142,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_3.py')" @@ -150,7 +162,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# We suggest using these bins.\n", @@ -176,7 +190,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "min_estimate = ...\n", @@ -186,7 +202,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_5.py')" @@ -206,7 +224,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def mean_based_estimator(nums):\n", @@ -222,7 +242,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_6.py')" @@ -232,7 +254,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. Sampling\n", + "## 2. Sampling Basketball Data\n", "\n", "Run the cell below to load the player and salary data." ] @@ -240,7 +262,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "player_data = Table().read_table(\"player_data.csv\")\n", @@ -277,13 +301,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Question 2.1**.
Complete the `histograms` function, which takes a table with columns `Age` and `Salary` and draws a histogram for each one. Use the min and max functions to pick the bin boundaries so that all data appears for any table passed to your function. Use the same bin widths as before (1 year for `Age` and $1,000,000 for `Salary`)." + "**Question 2.1**.
Complete the `histograms` function, which takes a table with columns `Age` and `Salary` and draws a histogram for each one. Use the min and max functions to pick the bin boundaries so that all data appears for any table passed to your function. Use the same bin widths as before (1 year for `Age` and $1,000,000 for `Salary`).\n", + "\n", + "*Hint*: When creating the bins for the the histograms, think critically about what the stop argument should be for `np.arange`. Histograms are inclusive on the left hand side of the interval, but not the right. So, if we have a maximum age of 80, we need a 80-81 bin in order to capture this in the histogram. " ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def histograms(t):\n", @@ -302,7 +330,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py') # Warning: Charts will be displayed while running this test" @@ -323,7 +353,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def compute_statistics(age_and_salary_data):\n", @@ -339,7 +371,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_2.py') # Warning: Charts will be displayed while running this test" @@ -360,7 +394,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "convenience_sample = ...\n", @@ -370,7 +406,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_3.py')" @@ -386,7 +424,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "convenience_stats = ...\n", @@ -396,7 +436,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_4.py')" @@ -412,7 +454,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def compare_salaries(first, second, first_title, second_title):\n", @@ -441,7 +485,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def load_data(salary_file):\n", @@ -458,7 +504,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Original:\n", @@ -475,6 +523,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -487,9 +536,9 @@ "metadata": {}, "source": [ "### Producing simple random samples\n", - "Often it's useful to take random samples even when we have a larger dataset available. The randomized response technique was one example we saw in lecture. Another is to help us understand how inaccurate other samples are.\n", + "Often it's useful to take random samples even when we have a larger dataset available. Another is to help us understand how inaccurate other samples are.\n", "\n", - "Tables provide the method `sample()` for producing random samples. Note that its default is to sample with replacement. To see how to call `sample()`, search the documentation on the [resources page](http://data8.org/su17/resources.html) of the course website, or enter `full_data.sample?` into a code cell and press Shift + Enter." + "Tables provide the method `sample()` for producing random samples. Note that its default is to sample with replacement. To see how to call `sample()`, search the documentation on the [datascience documentation](http://data8.org/datascience/) of the course website, or enter `full_data.sample?` into a code cell and press Shift + Enter." ] }, { @@ -509,6 +558,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -540,6 +590,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -575,7 +626,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -589,9 +642,9 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "data8x", + "display_name": "Python 3", "language": "python", - "name": "data8x" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -603,10 +656,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/2/lab05/lab05.ipynb b/materials/x18/lab/2/lab05/lab05.ipynb index 55767f7..f5061c2 100644 --- a/materials/x18/lab/2/lab05/lab05.ipynb +++ b/materials/x18/lab/2/lab05/lab05.ipynb @@ -8,9 +8,13 @@ "\n", "Welcome to Lab 5!\n", "\n", - "We will attempt to estimate the number `N`, a *population parameter*, that represents the number of elements in a population. We get to observe a uniform random sample of the elements, and for each one we can observe its serial number (from 1 to `N`). All elements are labeled with consecutive serial numbers from 1 to `N`, so `N` is the total number of elements. \n", + "The British Royal Air Force wanted to know how many warplanes the Germans had (some number `N`, which is a *population parameter*), and they needed to estimate that quantity knowing only a random sample of the planes' serial numbers (from 1 to `N`). We know that the German's warplanes are labeled consecutively from 1 to `N`, so `N` would be the total number of warplanes they have. \n", "\n", - "Given *just* a random sample of elements, we'll estimate `N`, and then we'll use simulation to find a confidence interval around our estimate, all without ever looking at the whole population. This is an example of *statistical inference*.\n", + "We normally investigate the random variation amongst our estimates by simulating a sampling procedure from the population many times and computing estimates from each sample that we generate. In real life, if the RAF had known what the population looked like, they would have known `N` and would not have had any reason to think about random sampling. However, they didn't know what the population looked like, so they couldn't have run the simulations that we normally do. \n", + "\n", + "Simulating a sampling procedure many times was a useful exercise in *understanding random variation* for an estimate, but it's not as useful as a tool for practical data analysis.\n", + "\n", + "Let's flip that sampling idea on its head to make it practical. Given *just* a random sample of serial numbers, we'll estimate `N`, and then we'll use simulation to find out how accurate our estimate probably is, without ever looking at the whole population. This is an example of *statistical inference*.\n", "\n", "As usual, **run the cell below** to prepare the lab and the automatic tests." ] @@ -78,7 +82,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "observations = Table.read_table(\"serial_numbers.csv\")\n", @@ -97,7 +103,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def plot_serial_numbers(numbers):\n", @@ -139,7 +147,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def mean_based_estimator(nums):\n", @@ -152,7 +162,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_4.py')" @@ -169,7 +181,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "max_estimate = ...\n", @@ -179,7 +193,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_5.py')" @@ -265,7 +281,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# This line is a little magic to make sure that you see the same results\n", @@ -279,7 +297,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py')" @@ -303,7 +323,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "...\n", @@ -338,7 +360,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_3.py')" @@ -355,7 +379,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "resample_0 = ...\n", @@ -419,7 +445,7 @@ "\n", "**Note**: *The last part of this lab is difficult to check automatically, so it will not be graded. We strongly suggest that you try to complete it. We will release solutions to this lab so that you can compare to them.*\n", "\n", - "Since resampling from a sample is very similar to sampling from a population, the code should look almost the same. That means we can write a function that simulates either sampling from a population or resampling from a sample. If we pass it a population as its argument, it will do the former; if we pass it a sample, it will do the latter.\n", + "Since resampling from a sample looks just like sampling from a population, the code should look almost the same. That means we can write a function that simulates the process of either sampling from a population or resampling from a sample. If we pass in population as its argument, it will do the former; if we pass in a sample, it will do the latter.\n", "\n", "#### Question 3.1\n", "Write a function called `simulate_estimates`. It should take 4 arguments:\n", @@ -428,11 +454,7 @@ "3. A function that computes a statistic of a sample. This argument is a *function* that takes an array of serial numbers as its argument and returns a number.\n", "4. The number of replications to perform.\n", "\n", - "It should simulate many samples with replacement from the given table. (The number of samples is the 4th argument.) For each of those samples, it should compute the statistic on that sample. Then it should return an array containing each of those statistics. The code below provides an example use of your function and describes how you can verify that you've written it correctly.\n", - "\n", - "**Hint**: Your implementation should contain the following line, which extracts the \"serial number\" column from some table ``t`` and calls the `statistic` function on it, storing the result in the name `s`.\n", - "\n", - "``s = statistic(t.column(\"serial number\"))``" + "It should simulate many samples with replacement from the given table. (The number of samples is the 4th argument.) For each of those samples, it should compute the statistic on that sample. Then it should return an array containing each of those statistics. The code below provides an example use of your function and describes how you can verify that you've written it correctly." ] }, { @@ -479,7 +501,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "bootstrap_estimates = ...\n", @@ -497,7 +521,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "left_end = ...\n", @@ -543,7 +569,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "population = Table().with_column(\"serial number\", np.arange(1, 150+1))\n", @@ -631,10 +659,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/2/lab06/lab06.ipynb b/materials/x18/lab/2/lab06/lab06.ipynb index 7d8aa3e..876d1b4 100644 --- a/materials/x18/lab/2/lab06/lab06.ipynb +++ b/materials/x18/lab/2/lab06/lab06.ipynb @@ -463,9 +463,9 @@ "### Background\n", "Medical tests are an important but surprisingly controversial topic. For years, women have been advised to get regular mammograms (tests for breast cancer). Today, there is controversy over whether the tests are useful at all.\n", "\n", - "Part of the problem with such tests is that they are not perfectly reliable. Someone without cancer, or with only a benign form of cancer, can see a positive result on a test for cancer. Someone with cancer can receive a negative result. (\"Positive\" means \"pointing toward cancer,\" so in this context it's bad!) Doctors and patients often deal poorly with the first case, called *false positives*. For example, a patient may receive dangerous treatment like chemotherapy or radiation despite having no cancer or, as happens more frequently, having a cancer that would not have impacted her health.\n", + "Part of the problem with such tests is that they are not perfectly reliable. Someone without cancer, or with only a benign form of cancer, can see a positive result on a test for cancer. Someone with cancer can receive a negative result. (\"Positive\" means \"pointing toward cancer,\" so in this context it's bad!) Doctors and patients often deal poorly with the first case, called *false positives*. For example, a patient may receive dangerous treatment like chemotherapy or radiation despite having no cancer or, as happens more frequently, having a cancer that would not have impacted their health.\n", "\n", - "Conditional probability is a good way to think about such situations. For example, you can compute the chance that you have cancer, given the result of a test, by combining information from different probability distributions. You'll see that the chance you have cancer can be far from 100% even if you have a positive test result from a test that is usually accurate." + "Conditional probability is a good way to think about such situations. For example, you can compute the chance that you have cancer **given the results of a diagnostic test** by combining information from different probability distributions. You'll see that the chance you have cancer can be far from 100% even if you have a positive test result from a test that is usually accurate." ] }, { @@ -866,7 +866,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.1" }, "timetravel": { "allowedContentTypes": [ @@ -879,4 +879,3 @@ "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/3/lab01/lab01.ipynb b/materials/x18/lab/3/lab01/lab01.ipynb index d567a8a..4ebc25c 100644 --- a/materials/x18/lab/3/lab01/lab01.ipynb +++ b/materials/x18/lab/3/lab01/lab01.ipynb @@ -8,7 +8,7 @@ "\n", "Welcome to Lab 1 and Data 8.3x!\n", "\n", - "In this week's lab, we will cover two relatively orthogonal concepts. First, we will investigate the variance of sample means, found in [Section 14.5](https://www.inferentialthinking.com/chapters/14/5/Variability_of_the_Sample_Mean) of our textbook. We will also get some hands-on practice with understanding the association between two variables, which you can read more about in [Section 15.1](https://www.inferentialthinking.com/chapters/15/1/Correlation)." + "In this lab we will learn about [the variance of sample means](https://www.inferentialthinking.com/chapters/14/5/variability-of-the-sample-mean.html) as well as ways to understand and quantify [the association between two variables](https://www.inferentialthinking.com/chapters/15/1/correlation.html)." ] }, { @@ -52,7 +52,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For the curious: this is how to display a YouTube video in a\n", @@ -82,7 +84,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful = Table.read_table(\"faithful.csv\")\n", @@ -101,7 +105,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "..." @@ -126,7 +132,7 @@ "\n", " (nums - np.mean(nums)) / np.std(nums)\n", "\n", - "...is an array of those numbers in standard units." + "is an array of those numbers in standard units." ] }, { @@ -142,6 +148,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -160,7 +167,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_2.py')" @@ -178,7 +187,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "..." @@ -219,7 +230,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_4.py')" @@ -237,7 +250,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "r = ...\n", @@ -247,7 +262,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_5.py')" @@ -269,7 +286,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "salaries = Table.read_table('sf_salaries_2014.csv').select(\"salary\")\n", @@ -279,7 +298,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "salary_mean = np.mean(salaries.column('salary'))\n", @@ -289,7 +310,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "salaries.hist('salary', bins=np.arange(0, 300000+10000*2, 10000))\n", @@ -355,7 +378,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "simulate_sample_mean(salaries, 'salary', 100, 10000) \n", @@ -373,6 +398,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -407,9 +433,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "** Question 2.3 **\n", - "
\n", - "Below, we'll look at what happens when we take a fixed sample, then bootstrap from it with different numbers of resamples. How does the distribution of the resampled means change?" + "** Question 2.3 **" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Assign the variable `bootstrap_sampled_SD` to the integer corresponding to your answer to the following question:\n", + "\n", + "When I increase the number of bootstrap samples that I take, for a fixed sample size, the SD of my sample mean will...\n", + "\n", + "1. Increase\n", + "2. Decrease\n", + "3. Stay about the same\n", + "4. Vary widly" ] }, { @@ -420,8 +458,7 @@ }, "outputs": [], "source": [ - "simulate_sample_mean(salaries, 'salary', 100, 1000)\n", - "plots.xlim(50000, 100000)" + "bootstrap_sampled_SD = ..." ] }, { @@ -432,8 +469,14 @@ }, "outputs": [], "source": [ - "simulate_sample_mean(salaries, 'salary', 100, 5000)\n", - "plots.xlim(50000, 100000)" + "check('tests/q2_3.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below, we'll look at what happens when we take a fixed sample, then bootstrap from it with different numbers of resamples. How does the distribution of the resampled means change?" ] }, { @@ -444,24 +487,10 @@ }, "outputs": [], "source": [ - "simulate_sample_mean(salaries, 'salary', 100, 10000)\n", + "simulate_sample_mean(salaries, 'salary', 100, 500)\n", "plots.xlim(50000, 100000)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assign the variable `bootstrap_sampled_SD` to the integer corresponding to your answer to the following question:\n", - "\n", - "When I increase the number of bootstrap samples that I take, for a fixed sample size, the SD of my sample mean will...\n", - "\n", - "1. Increase\n", - "2. Decrease\n", - "3. Stay about the same\n", - "4. Vary widly" - ] - }, { "cell_type": "code", "execution_count": null, @@ -470,7 +499,8 @@ }, "outputs": [], "source": [ - "bootstrap_sampled_SD = ..." + "simulate_sample_mean(salaries, 'salary', 100, 1000)\n", + "plots.xlim(50000, 100000)" ] }, { @@ -481,7 +511,7 @@ }, "outputs": [], "source": [ - "simulate_sample_mean(salaries, 'salary', 100, 500)\n", + "simulate_sample_mean(salaries, 'salary', 100, 5000)\n", "plots.xlim(50000, 100000)" ] }, @@ -493,7 +523,15 @@ }, "outputs": [], "source": [ - "check('tests/q2_3.py')" + "simulate_sample_mean(salaries, 'salary', 100, 10000)\n", + "plots.xlim(50000, 100000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What did you notice about the sample means of the four bootstrapped samples above?" ] }, { @@ -635,7 +673,7 @@ "source": [ "You should notice that the distribution of means gets spiker, and that the distribution of the sample increasingly looks like the distribution of the population as we get to larger sample sizes. \n", "\n", - "Is there a relationship between the sample size and absolute error in standard deviation? Identify this relationship – if you're having trouble, take a look at [Section 14.5](https://www.inferentialthinking.com/chapters/14/5/Variability_of_the_Sample_Mean) in our textbook." + "Is there a relationship between the sample size and absolute error in standard deviation? Identify this relationship – if you're having trouble, take a look at this [section](https://www.inferentialthinking.com/chapters/14/5/variability-of-the-sample-mean.html) in our textbook about the variability of sample means." ] }, { @@ -688,10 +726,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/3/lab02/lab02.ipynb b/materials/x18/lab/3/lab02/lab02.ipynb index 114ac62..9742ba3 100644 --- a/materials/x18/lab/3/lab02/lab02.ipynb +++ b/materials/x18/lab/3/lab02/lab02.ipynb @@ -59,7 +59,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful = Table.read_table(\"faithful.csv\")\n", @@ -76,7 +78,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "duration_mean = np.mean(faithful.column(\"duration\"))\n", @@ -101,7 +105,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "r = np.mean(faithful_standard.column(0) * faithful_standard.column(1))\n", @@ -123,7 +129,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def plot_data_and_line(dataset, x, y, point_0, point_1):\n", @@ -159,7 +167,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "slope = ...\n", @@ -180,7 +190,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "intercept = slope*(-duration_mean) + wait_mean\n", @@ -190,7 +202,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q2_1.py')" @@ -210,7 +224,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "two_minute_predicted_waiting_time = ...\n", @@ -230,7 +246,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_1.py')" @@ -247,6 +265,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -273,7 +292,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful_predictions = ...\n", @@ -283,7 +304,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_2.py')" @@ -302,7 +325,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "residual = faithful_predictions.column(1) - faithful_predictions.column(2)\n", @@ -313,7 +338,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q3_3.py')" @@ -329,7 +356,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful_residuals.scatter(\"duration\", \"residual\", color=\"r\")" @@ -355,7 +384,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful_residuals.scatter(\"duration\", \"wait\", label=\"actual waiting time\", color=\"blue\")\n", @@ -375,7 +406,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "zero_minute_predicted_waiting_time = ...\n", @@ -390,7 +423,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q4_1.py')" @@ -413,7 +448,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful.scatter(\"duration\", \"wait\", label=\"actual waiting time\", color=\"blue\")\n", @@ -458,7 +495,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def reg_coeff(t):\n", @@ -476,7 +515,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_1.py')" @@ -528,7 +569,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_2.py')" @@ -544,7 +587,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful.scatter(0, 1)\n", @@ -577,7 +622,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q5_3.py')" @@ -593,7 +640,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "faithful.with_column('predicted', faithful.apply(predict_wait, 'duration')).scatter(0)" @@ -659,10 +708,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 } - diff --git a/materials/x18/lab/3/lab03/lab03.ipynb b/materials/x18/lab/3/lab03/lab03.ipynb index a6f7006..669974b 100644 --- a/materials/x18/lab/3/lab03/lab03.ipynb +++ b/materials/x18/lab/3/lab03/lab03.ipynb @@ -8,15 +8,17 @@ "\n", "Welcome to Lab 3 of Data 8.3x!\n", "\n", - "Sometimes, the primary purpose of regression analysis is to learn something about the slope or intercept of the best-fitting line. When we use a sample of data to estimate the slope or intercept, our estimate is subject to random error, just like our estimates of population means and medians.\n", + "Sometimes, the primary purpose of regression analysis is to learn something about the slope or intercept of the best-fitting line. When we use a sample of data to estimate the slope or intercept, our estimate is subject to random error, just as in the simpler case of the mean of a random sample.\n", "\n", - "In this lab, we'll use linear regression to estimate the age of the universe using pictures of exploding stars. Our estimate will come from a sample of all exploding stars. We'll compute a confidence interval to quantify the error caused by sampling." + "In this lab, we'll use regression to get an accurate estimate for the age of the universe, using pictures of exploding stars. Our estimate will come from a sample of all exploding stars. We'll compute a confidence interval to quantify the error caused by sampling." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Run this cell, but please don't change it.\n", @@ -50,7 +52,7 @@ "### The Actual Big Bang Theory\n", "In the early 20th century, the most popular cosmological theory suggested that the universe had always existed at a fixed size. Today, the Big Bang theory prevails: Our universe started out very small and is still expanding.\n", "\n", - "A consequence of this is Hubble's Law, which says that the expansion of the universe creates the appearance that every celestial object that's reasonably far away from Earth (for example, another galaxy) is moving away from us at a constant speed. If we extrapolate that motion backwards to the time when everything in the universe was in the same place, that time is (roughly) the beginning of the universe!\n", + "A consequence of this is Hubble's Law, which states that every celestial object that's reasonably far away from Earth (for example, another galaxy) is moving away from us at a constant speed. If we extrapolate that motion backwards to the time when everything in the universe was in the same place, that time is (roughly) the beginning of the universe!\n", "\n", "Scientists have used this fact, along with measurements of the current *location* and *movement speed* of other celestial objects, to estimate when the universe started.\n", "\n", @@ -62,7 +64,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Just run this cell. (The simulation is actually not\n", @@ -125,7 +129,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Run this cell to see a picture of Mei's locations over time.\n", @@ -154,7 +160,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Just run this cell.\n", @@ -189,7 +197,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Just run this cell.\n", @@ -207,7 +217,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Fill in the start time you infer from the above line.\n", @@ -219,6 +231,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -243,7 +256,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Just run this cell.\n", @@ -267,7 +282,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Fill this in manually by examining the line above.\n", @@ -280,7 +297,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_2.py')" @@ -318,6 +337,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "for_assignment_type": "student" }, "outputs": [], @@ -341,6 +361,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -352,7 +373,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_4.py')" @@ -378,17 +401,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def fit_line(tbl):\n", " # Your code may need more than 1 line below here.\n", - " # Rather than using the regression line formulas, try\n", - " # calling minimize on the mean squared error.\n", - " ...\n", - " slope = ...\n", - " intercept = ...\n", - " return make_array(slope, intercept)\n", + " def mse(..., ...):\n", + " ... \n", + " return ... \n", " \n", "# Here is an example call to your function. To test your function,\n", "# figure out the right slope and intercept by hand.\n", @@ -401,7 +423,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_5.py')" @@ -423,6 +447,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": false }, "outputs": [], @@ -466,7 +491,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "bootstrap_ages = make_array()\n", @@ -482,7 +509,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "check('tests/q1_7.py')" @@ -515,7 +544,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# For your convenience, you can run this cell to run all the tests at once!\n", @@ -543,7 +574,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.1" }, "widgets": { "state": { @@ -568,4 +599,3 @@ "nbformat": 4, "nbformat_minor": 1 } -