From 0791d00ebed7f7029b5cf86afe54fb9a9e633830 Mon Sep 17 00:00:00 2001 From: Ricardo Mendes Date: Wed, 19 Apr 2023 20:58:54 +0100 Subject: [PATCH] msg --- your-code/challenge-1.ipynb | 374 ++++++++++++++++++++++++++++++++++-- your-code/challenge-2.ipynb | 70 ++++++- 2 files changed, 418 insertions(+), 26 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c574eba..ed958f1 100644 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -15,13 +15,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -33,19 +40,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Durante un tiempo no estuvo segura de si su marido era su marido\n" + ] + } + ], "source": [ "str_list = ['Durante', 'un', 'tiempo', 'no', 'estuvo', 'segura', 'de', 'si', 'su', 'marido', 'era', 'su', 'marido']\n", - "# Your code here:\n" + "# Your code here:\n", + "list1 = ' '.join(str_list)\n", + "print(list1)\n" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 4, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Durante un tiempo no estuvo segura de si su marido era su marido.\n" + ] + } + ], "source": [ - "In the cell below, use the list of strings to create a grocery list. Start the list with the string `Grocery list: ` and include a comma and a space between each item except for the last one. Include a period at the end. Only include foods in the list that start with the letter 'b' and ensure all foods are lower case." + "list1 = ' '.join(str_list) + '.'\n", + "print(list1)" ] }, { @@ -53,11 +81,60 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the cell below, use the list of strings to create a grocery list. Start the list with the string `Grocery list: ` and include a comma and a space between each item except for the last one. Include a period at the end. Only include foods in the list that start with the letter 'b' and ensure all foods are lower case." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bananas Chocolate bread diapers Ice Cream Brownie Mix broccoli.\n" + ] + } + ], "source": [ "food_list = ['Bananas', 'Chocolate', 'bread', 'diapers', 'Ice Cream', 'Brownie Mix', 'broccoli']\n", - "# Your code here:\n" + "# Your code here:\n", + "Grocery_list = ' '.join(food_list) + '.'\n", + "print(Grocery_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bananas Chocolate bread diapers Ice Cream Brownie Mix broccoli.\n" + ] + } + ], + "source": [ + "Grocery_list = ' '.join(food_list) + '.'\n", + "print(Grocery_list)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -106,9 +183,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Some say the world will end in fire,',\n", + " 'Some say in ice.',\n", + " 'From what I’ve tasted of desire',\n", + " 'I hold with those who favor fire.',\n", + " 'But if it had to perish twice,',\n", + " 'I think I know enough of hate',\n", + " 'To say that for destruction ice',\n", + " 'Is also great',\n", + " 'And would suffice.']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "poem = \"\"\"Some say the world will end in fire,\n", "Some say in ice.\n", @@ -120,9 +218,118 @@ "Is also great\n", "And would suffice.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "poem.split(\"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "246" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "poem.count(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'some': 2, 'say': 3, 'the': 1, 'world': 1, 'will': 1, 'end': 1, 'in': 2, 'fire,': 1, 'ice.': 1, 'from': 1, 'what': 1, 'i’ve': 1, 'tasted': 1, 'of': 2, 'desire': 1, 'i': 3, 'hold': 1, 'with': 1, 'those': 1, 'who': 1, 'favor': 1, 'fire.': 1, 'but': 1, 'if': 1, 'it': 1, 'had': 1, 'to': 2, 'perish': 1, 'twice,': 1, 'think': 1, 'know': 1, 'enough': 1, 'hate': 1, 'that': 1, 'for': 1, 'destruction': 1, 'ice': 1, 'is': 1, 'also': 1, 'great': 1, 'and': 1, 'would': 1, 'suffice.': 1}\n" + ] + } + ], + "source": [ + "words = poem.lower().split()\n", + "\n", + "# Create a dictionary with each unique word as a key and its frequency as a value using a dictionary comprehension\n", + "word_freq = {word: words.count(word) for word in words}\n", + "\n", + "# Print the dictionary of word frequencies\n", + "print(word_freq)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Some': 2, 'say': 3, 'the': 1, 'world': 1, 'will': 1, 'end': 1, 'in': 2, 'fire,': 1, 'ice.': 1, 'From': 1, 'what': 1, 'I’ve': 1, 'tasted': 1, 'of': 2, 'desire': 1, 'I': 3, 'hold': 1, 'with': 1, 'those': 1, 'who': 1, 'favor': 1, 'fire.': 1, 'But': 1, 'if': 1, 'it': 1, 'had': 1, 'to': 1, 'perish': 1, 'twice,': 1, 'think': 1, 'know': 1, 'enough': 1, 'hate': 1, 'To': 1, 'that': 1, 'for': 1, 'destruction': 1, 'ice': 1, 'Is': 1, 'also': 1, 'great': 1, 'And': 1, 'would': 1, 'suffice.': 1}\n" + ] + } + ], + "source": [ + "words = poem.lower()\n", + "words = poem.split()\n", + "\n", + "word_freq = {word: words.count(word) for word in words}\n", + "\n", + "print(word_freq)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -172,18 +379,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['The', 'Petals']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import re\n", "\n", "poem = \"\"\"The apparition of these faces in the crowd;\n", "Petals on a wet, black bough.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "rule = \"[A-Z][a-z]+\"\n", + "re.findall(rule, poem)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -193,15 +420,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "expected string or bytes-like object", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Your code here:\u001b[39;00m\n\u001b[1;32m 5\u001b[0m rule \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[0-9]\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 6\u001b[0m \u001b[43mre\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/lib/python3.10/re.py:200\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(pattern, string, flags)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msearch\u001b[39m(pattern, string, flags\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m 198\u001b[0m \u001b[38;5;124;03m\"\"\"Scan through string looking for a match to the pattern, returning\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;124;03m a Match object, or None if no match was found.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_compile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstring\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: expected string or bytes-like object" + ] + } + ], "source": [ "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "rule = \"[0-9]\"\n", + "re.search(rule, data)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", + "rule = \"[0-9]\"\n", + "re.search(\"rule\", \"data\")\n", + "print(re.search)" ] }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", + "\n", + "re.search(\"0-9\", \"data\")\n", + "print(re.search)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "expected string or bytes-like object", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[29], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m data \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m123abc\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mabc123\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mJohnSmith1\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mABBY4\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mJANE\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 3\u001b[0m rule \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124md\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[43mre\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/lib/python3.10/re.py:200\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(pattern, string, flags)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msearch\u001b[39m(pattern, string, flags\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m 198\u001b[0m \u001b[38;5;124;03m\"\"\"Scan through string looking for a match to the pattern, returning\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;124;03m a Match object, or None if no match was found.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_compile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstring\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: expected string or bytes-like object" + ] + } + ], + "source": [ + "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", + "\n", + "rule = \"\\d\"\n", + "re.search(rule, data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -226,7 +562,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -240,7 +576,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 6873bd2..6b727af 100644 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Bag of Words Lab\n", + "### Bag of Words Lab\n", "\n", "## Introduction\n", "\n", @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -88,13 +88,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Write your code here\n" + "corpus = []\n" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'doc1.txt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m filename \u001b[38;5;129;01min\u001b[39;00m docs:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# Open the file for reading\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Read the contents of the file into a string\u001b[39;00m\n\u001b[1;32m 5\u001b[0m content \u001b[38;5;241m=\u001b[39m file\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Append the contents to the corpus array\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/IPython/core/interactiveshell.py:282\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 277\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 278\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 279\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 280\u001b[0m )\n\u001b[0;32m--> 282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'doc1.txt'" + ] + } + ], + "source": [ + "##DISCLAIMER, I was trying to find the solution online but was not able so I couldn't continue \n", + "\n", + "for filename in docs:\n", + " # Open the file for reading\n", + " with open(filename, 'r') as file:\n", + " # Read the contents of the file into a string\n", + " content = file.read()\n", + " # Append the contents to the corpus array\n", + " corpus.append(content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -102,6 +139,23 @@ "Print `corpus`." ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n" + ] + } + ], + "source": [ + "print(corpus)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -151,7 +205,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "bag_of_words = []" + ] }, { "cell_type": "markdown", @@ -304,7 +360,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -318,7 +374,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.10.9" } }, "nbformat": 4,