From 53e5c96e255ee066de2366eeef40b4536cfde28d Mon Sep 17 00:00:00 2001 From: "Gaby De La Cerda M.S" Date: Sun, 4 Sep 2022 23:18:16 -0500 Subject: [PATCH] Jul-DAPT [Gabriela De La Cerda] Lab Completion --- your-code/challenge-1.ipynb | 280 ++++++++++++++++++++++++++++++++---- your-code/challenge-2.ipynb | 187 ++++++++++++++++++++---- 2 files changed, 415 insertions(+), 52 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index 4302084..06af2e0 100644 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -33,12 +33,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Durante un tiempo no estuvo segura de si su marido era su marido.\n" + ] + } + ], "source": [ "str_list = ['Durante', 'un', 'tiempo', 'no', 'estuvo', 'segura', 'de', 'si', 'su', 'marido', 'era', 'su', 'marido']\n", - "# Your code here:\n" + "# Your code here:\n", + "# combine the list into a string with spaces between each word and add a period at the end and print it\n", + "str_list = ' '.join(str_list) + '.'\n", + "print(str_list)\n" ] }, { @@ -50,12 +61,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bananas, chocolate, bread, diapers, ice cream, brownie mix, broccoli.\n" + ] + } + ], "source": [ "food_list = ['Bananas', 'Chocolate', 'bread', 'diapers', 'Ice Cream', 'Brownie Mix', 'broccoli']\n", - "# Your code here:\n" + "# Your code here:\n", + "# create a new list with the same items but all in lowercase, include a comma and a space after each item and a period at the end of the last item and print it\n", + "food_list = [item.lower() + ', ' for item in food_list]\n", + "food_list[-1] = food_list[-1][:-2] + '.'\n", + "food_list = ''.join(food_list)\n", + "print(food_list)\n" ] }, { @@ -69,9 +93,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The area of the circle with radius: 4.5 is: 63.61725123519331\n" + ] + } + ], "source": [ "import math\n", "\n", @@ -89,8 +121,52 @@ " \n", " # Your code here:\n", " \n", - " \n", - "# Your output string here:" + " return pi * x**2\n", + "\n", + "# Your output string here:\n", + "\n", + "print(string1, radius, string2, area(radius))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "string1 = \"The area of the circle with radius:\"\n", + "string2 = \"is:\"\n", + "radius = 4.5" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The area of the circle with radius: 4.5 is: 63.61725123519331\n" + ] + } + ], + "source": [ + "# define a function that takes a radius and returns the area of a circle\n", + "def area(x, pi = math.pi):\n", + " return pi * x**2\n", + "\n", + "print(string1, radius, string2, area(radius))\n" ] }, { @@ -106,9 +182,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Some say the world will end in fire,', 'Some say in ice.', 'From what I’ve tasted of desire', 'I hold with those who favor fire.', 'But if it had to perish twice,', 'I think I know enough of hate', 'To say that for destruction ice', 'Is also great', 'And would suffice.']\n", + "{'some': 2, 'say': 3, 'the': 1, 'world': 1, 'will': 1, 'end': 1, 'in': 2, 'fire': 2, 'ice': 2, 'from': 1, 'what': 1, 'ive': 1, 'tasted': 1, 'of': 2, 'desire': 1, 'i': 3, 'hold': 1, 'with': 1, 'those': 1, 'who': 1, 'favor': 1, 'but': 1, 'if': 1, 'it': 1, 'had': 1, 'to': 2, 'perish': 1, 'twice': 1, 'think': 1, 'know': 1, 'enough': 1, 'hate': 1, 'that': 1, 'for': 1, 'destruction': 1, 'is': 1, 'also': 1, 'great': 1, 'and': 1, 'would': 1, 'suffice': 1}\n" + ] + } + ], "source": [ "poem = \"\"\"Some say the world will end in fire,\n", "Some say in ice.\n", @@ -120,7 +205,18 @@ "Is also great\n", "And would suffice.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "# split the poem into a list of strings, one string per line, and print it\n", + "poem = poem.split('\\n')\n", + "print(poem)\n", + "\n", + "# count the frequency of each word and add it to a dictionary, strip the punctuation and print the dictionary\n", + "poem = ' '.join(poem)\n", + "poem = re.sub(r'[^\\w\\s]', '', poem)\n", + "poem = poem.lower()\n", + "poem = poem.split()\n", + "poem = {word: poem.count(word) for word in poem}\n", + "print(poem)\n" ] }, { @@ -132,9 +228,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "i was angry with my friend \n", + "i told my wrath my wrath did end\n", + "i was angry with my foe \n", + "i told it not my wrath did grow \n", + "\n", + "and i waterd it in fears\n", + "night morning with my tears \n", + "and i sunned it with smiles\n", + "and with soft deceitful wiles \n", + "\n", + "and it grew both day and night \n", + "till it bore an apple bright \n", + "and my foe beheld it shine\n", + "and he knew that it was mine \n", + "\n", + "and into my garden stole \n", + "when the night had veild the pole \n", + "in the morning glad i see \n", + "my foe outstretched beneath the tree\n" + ] + } + ], "source": [ "blacklist = ['and', 'as', 'an', 'a', 'the', 'in', 'it']\n", "\n", @@ -158,7 +280,56 @@ "In the morning glad I see; \n", "My foe outstretched beneath the tree.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "poem = re.sub(r'[^\\w\\s]', '', poem)\n", + "poem = poem.lower()\n", + "print (poem)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "blacklist = ['and', 'as', 'an', 'a', 'the', 'in', 'it']\n", + "poem = \"\"\"I was angry with my friend; \n", + "I told my wrath, my wrath did end.\n", + "I was angry with my foe: \n", + "I told it not, my wrath did grow. \n", + "\n", + "And I waterd it in fears,\n", + "Night & morning with my tears: \n", + "And I sunned it with smiles,\n", + "And with soft deceitful wiles. \n", + "\n", + "And it grew both day and night. \n", + "Till it bore an apple bright. \n", + "And my foe beheld it shine,\n", + "And he knew that it was mine. \n", + "\n", + "And into my garden stole, \n", + "When the night had veild the pole; \n", + "In the morning glad I see; \n", + "My foe outstretched beneath the tree.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "if any ([x in poem for x in blacklist]): print ('True')\n", + "else: print ('False')" ] }, { @@ -172,14 +343,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 103, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['T', 'P']" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "poem = \"\"\"The apparition of these faces in the crowd;\n", "Petals on a wet, black bough.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "import regex as re\n", + "character_pattern = r'[A-Z]'\n", + "re.findall(character_pattern, poem)\n" ] }, { @@ -191,13 +376,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['123abc abc123 JohnSmith1 ABBY4 ']" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "data_string = ''\n", + "for i in data:\n", + " data_string = data_string + str(i) + ' '\n", + "\n", + "search_pattern = r'\\b?.+[\\d].+?\\b'\n", + "re.findall(search_pattern, data_string)" ] }, { @@ -213,18 +415,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 142, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['123abc', ' abc123', ' JohnSmith1']" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", - "# Your code here:\n" + "# Your code here:\n", + "data_string_2 = ''\n", + "for i in data:\n", + " data_string_2 = data_string_2 + str(i) + ' '\n", + "\n", + "search_pattern = r'\\b+[a-z]?+.[\\d]?+.[a-z].+?\\b'\n", + "re.findall(search_pattern, data_string_2)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.10.6 64-bit", "language": "python", "name": "python3" }, @@ -238,7 +457,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.10.6" + }, + "vscode": { + "interpreter": { + "hash": "698ac20c9d69fe4a5c9f1fd99cbff2bbdf000b3c9a118898e5a4c0237bee5db6" + } } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 87c5656..d84bd6d 100644 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -72,11 +72,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ - "docs = ['doc1.txt', 'doc2.txt', 'doc3.txt']" + "docs = ['doc1.txt', 'doc2.txt', 'doc3.txt']\n", + "def read_doc(doc):\n", + " with open(doc, 'r') as f:\n", + " return f.read()\n" ] }, { @@ -88,13 +91,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "corpus = []\n", "\n", - "# Write your code here\n" + "# Write your code here\n", + "for doc in docs:\n", + " corpus.append(read_doc(doc))\n" ] }, { @@ -106,9 +111,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Ironhack is cool.', 'I love Ironhack.', 'I am a student at Ironhack.']\n" + ] + } + ], "source": [ "print(corpus)" ] @@ -136,11 +149,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "# Write your code here" + "# Write your code here\n", + "import string\n", + "import re\n", + "\n", + "def remove_punctuation(corpus):\n", + " corpus = [re.sub(r'[^\\w\\s]','',i) for i in corpus]\n", + " corpus = [i.lower() for i in corpus]\n", + " return corpus\n", + " " ] }, { @@ -152,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -172,11 +193,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['ironhack', 'is', 'cool', 'i', 'love', 'am', 'a', 'student', 'at']\n" + ] + } + ], "source": [ - "# Write your code here" + "# Write your code here\n", + "def bag_of_words(corpus):\n", + " corpus = remove_punctuation(corpus)\n", + " corpus = [re.sub(r'[^\\w\\s]','',i) for i in corpus]\n", + " corpus = [i.lower() for i in corpus]\n", + " bag_of_words = []\n", + " for i in corpus:\n", + " for j in i.split():\n", + " if j not in bag_of_words:\n", + " bag_of_words.append(j)\n", + " return bag_of_words\n", + "\n", + "print(bag_of_words(corpus))\n", + "\n" ] }, { @@ -192,11 +234,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['ironhack', 'is', 'cool', 'i', 'love', 'am', 'a', 'student', 'at']\n" + ] + } + ], "source": [ - "print(bag_of_words)" + "print(bag_of_words(corpus))" ] }, { @@ -208,13 +258,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 1, 1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 1, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 1, 1, 1]]\n" + ] + } + ], "source": [ "term_freq = []\n", "\n", - "# Write your code here" + "# Write your code here\n", + "def term_freq(corpus):\n", + " corpus = remove_punctuation(corpus)\n", + " corpus = [re.sub(r'[^\\w\\s]','',i) for i in corpus]\n", + " corpus = [i.lower() for i in corpus]\n", + " bag_of_words = []\n", + " for i in corpus:\n", + " for j in i.split():\n", + " if j not in bag_of_words:\n", + " bag_of_words.append(j)\n", + " term_freq = []\n", + " for i in corpus:\n", + " temp = []\n", + " for j in bag_of_words:\n", + " temp.append(i.split().count(j))\n", + " term_freq.append(temp)\n", + " return term_freq\n", + "\n", + "print(term_freq(corpus))\n", + "\n" ] }, { @@ -228,11 +305,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 1, 1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 1, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 1, 1, 1]]\n" + ] + } + ], "source": [ - "print(term_freq)" + "print(term_freq(corpus))" ] }, { @@ -278,13 +363,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 95, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(['ironhack', 'cool', 'love', 'student'], [[1, 1, 0, 0], [1, 0, 1, 0], [1, 0, 0, 1]])\n" + ] + } + ], "source": [ "stop_words = ['all', 'six', 'less', 'being', 'indeed', 'over', 'move', 'anyway', 'fifty', 'four', 'not', 'own', 'through', 'yourselves', 'go', 'where', 'mill', 'only', 'find', 'before', 'one', 'whose', 'system', 'how', 'somewhere', 'with', 'thick', 'show', 'had', 'enough', 'should', 'to', 'must', 'whom', 'seeming', 'under', 'ours', 'has', 'might', 'thereafter', 'latterly', 'do', 'them', 'his', 'around', 'than', 'get', 'very', 'de', 'none', 'cannot', 'every', 'whether', 'they', 'front', 'during', 'thus', 'now', 'him', 'nor', 'name', 'several', 'hereafter', 'always', 'who', 'cry', 'whither', 'this', 'someone', 'either', 'each', 'become', 'thereupon', 'sometime', 'side', 'two', 'therein', 'twelve', 'because', 'often', 'ten', 'our', 'eg', 'some', 'back', 'up', 'namely', 'towards', 'are', 'further', 'beyond', 'ourselves', 'yet', 'out', 'even', 'will', 'what', 'still', 'for', 'bottom', 'mine', 'since', 'please', 'forty', 'per', 'its', 'everything', 'behind', 'un', 'above', 'between', 'it', 'neither', 'seemed', 'ever', 'across', 'she', 'somehow', 'be', 'we', 'full', 'never', 'sixty', 'however', 'here', 'otherwise', 'were', 'whereupon', 'nowhere', 'although', 'found', 'alone', 're', 'along', 'fifteen', 'by', 'both', 'about', 'last', 'would', 'anything', 'via', 'many', 'could', 'thence', 'put', 'against', 'keep', 'etc', 'amount', 'became', 'ltd', 'hence', 'onto', 'or', 'con', 'among', 'already', 'co', 'afterwards', 'formerly', 'within', 'seems', 'into', 'others', 'while', 'whatever', 'except', 'down', 'hers', 'everyone', 'done', 'least', 'another', 'whoever', 'moreover', 'couldnt', 'throughout', 'anyhow', 'yourself', 'three', 'from', 'her', 'few', 'together', 'top', 'there', 'due', 'been', 'next', 'anyone', 'eleven', 'much', 'call', 'therefore', 'interest', 'then', 'thru', 'themselves', 'hundred', 'was', 'sincere', 'empty', 'more', 'himself', 'elsewhere', 'mostly', 'on', 'fire', 'am', 'becoming', 'hereby', 'amongst', 'else', 'part', 'everywhere', 'too', 'herself', 'former', 'those', 'he', 'me', 'myself', 'made', 'twenty', 'these', 'bill', 'cant', 'us', 'until', 'besides', 'nevertheless', 'below', 'anywhere', 'nine', 'can', 'of', 'your', 'toward', 'my', 'something', 'and', 'whereafter', 'whenever', 'give', 'almost', 'wherever', 'is', 'describe', 'beforehand', 'herein', 'an', 'as', 'itself', 'at', 'have', 'in', 'seem', 'whence', 'ie', 'any', 'fill', 'again', 'hasnt', 'inc', 'thereby', 'thin', 'no', 'perhaps', 'latter', 'meanwhile', 'when', 'detail', 'same', 'wherein', 'beside', 'also', 'that', 'other', 'take', 'which', 'becomes', 'you', 'if', 'nobody', 'see', 'though', 'may', 'after', 'upon', 'most', 'hereupon', 'eight', 'but', 'serious', 'nothing', 'such', 'why', 'a', 'off', 'whereby', 'third', 'i', 'whole', 'noone', 'sometimes', 'well', 'amoungst', 'yours', 'their', 'rather', 'without', 'so', 'five', 'the', 'first', 'whereas', 'once']\n", "\n", - "# Write your code below\n" + "# Write your code below\n", + "def remove_stop_words(corpus):\n", + " corpus = remove_punctuation(corpus)\n", + " corpus = [re.sub(r'[^\\w\\s]','',i) for i in corpus]\n", + " corpus = [i.lower() for i in corpus]\n", + " bag_of_words = []\n", + " for i in corpus:\n", + " for j in i.split():\n", + " if j not in bag_of_words:\n", + " bag_of_words.append(j)\n", + " for i in stop_words:\n", + " if i in bag_of_words:\n", + " bag_of_words.remove(i)\n", + " term_freq = []\n", + " for i in corpus:\n", + " temp = []\n", + " for j in bag_of_words:\n", + " temp.append(i.split().count(j))\n", + " term_freq.append(temp)\n", + " return bag_of_words, term_freq\n", + "\n", + "print(remove_stop_words(corpus))\n" ] }, { @@ -315,6 +429,31 @@ " [1 1 0 1 0 0 1]]\n", " ```" ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['am' 'at' 'cool' 'ironhack' 'is' 'love' 'student']\n", + "[[0 0 1 1 1 0 0]\n", + " [0 0 0 1 0 1 0]\n", + " [1 1 0 1 0 0 1]]\n" + ] + } + ], + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer\n", + "vectorizer = CountVectorizer()\n", + "X = vectorizer.fit_transform(corpus)\n", + "print(vectorizer.get_feature_names_out())\n", + "print(X.toarray())\n", + "\n" + ] } ], "metadata": {