From 4dca0aa240ea98386480c2b267f9f7e294ea29e9 Mon Sep 17 00:00:00 2001 From: Juliano Nogueira Date: Sat, 28 May 2022 15:41:03 +0100 Subject: [PATCH] lab-strings --- your-code/challenge-1.ipynb | 235 +++++++++++++++++++++++++++++++----- your-code/challenge-2.ipynb | 139 +++++++++++++++++---- 2 files changed, 322 insertions(+), 52 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c574eba..95f8150 100644 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -33,12 +33,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Durante un tiempo no estuvo segura de si su marido era su marido\n" + ] + } + ], "source": [ "str_list = ['Durante', 'un', 'tiempo', 'no', 'estuvo', 'segura', 'de', 'si', 'su', 'marido', 'era', 'su', 'marido']\n", - "# Your code here:\n" + "\n", + "# Your code here:\n", + "\n", + "String_done = \" \".join(str_list)\n", + "print(String_done)" ] }, { @@ -50,12 +62,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Grocery list :Bananas, bread, Brownie Mix, broccoli\n" + ] + } + ], "source": [ - "food_list = ['Bananas', 'Chocolate', 'bread', 'diapers', 'Ice Cream', 'Brownie Mix', 'broccoli']\n", - "# Your code here:\n" + "Grocery_list = ['Bananas', 'Chocolate', 'bread', 'diapers', 'Ice Cream', 'Brownie Mix', 'broccoli']\n", + "\n", + "# Your code here:\n", + "\n", + "Glist_string = \",\".join(Grocery_list)\n", + "dictum = \"[Bb]\\\\w+\\s?\\w+\"\n", + "print(\"Grocery list :\" + \", \".join(re.findall(dictum,Glist_string)))" ] }, { @@ -69,16 +94,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Isert the area:5\n", + "78.53981633974483\n" + ] + } + ], "source": [ "import math\n", "\n", "string1 = \"The area of the circle with radius:\"\n", "string2 = \"is:\"\n", "radius = 4.5\n", - "\n", + "Input = input(\"Isert the area:\")\n", + "x = float(Input)\n", "def area(x, pi = math.pi):\n", " # This function takes a radius and returns the area of a circle. We also pass a default value for pi.\n", " # Input: Float (and default value for pi)\n", @@ -88,9 +123,11 @@ " # Sample Output: 78.53981633\n", " \n", " # Your code here:\n", + " \n", " return pi * (x**2)\n", " \n", - "# Your output string here:\n" + "# Your output string here:\n", + "print(area(x))" ] }, { @@ -106,9 +143,61 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Some': 2,\n", + " 'say': 3,\n", + " 'the': 1,\n", + " 'world': 1,\n", + " 'will': 1,\n", + " 'end': 1,\n", + " 'in': 2,\n", + " 'fire': 2,\n", + " 'ice': 2,\n", + " 'From': 1,\n", + " 'what': 1,\n", + " 'I': 4,\n", + " 've': 1,\n", + " 'tasted': 1,\n", + " 'of': 2,\n", + " 'desire': 1,\n", + " 'hold': 1,\n", + " 'with': 1,\n", + " 'those': 1,\n", + " 'who': 1,\n", + " 'favor': 1,\n", + " 'But': 1,\n", + " 'if': 1,\n", + " 'it': 1,\n", + " 'had': 1,\n", + " 'to': 1,\n", + " 'perish': 1,\n", + " 'twice': 1,\n", + " 'think': 1,\n", + " 'know': 1,\n", + " 'enough': 1,\n", + " 'hate': 1,\n", + " 'To': 1,\n", + " 'that': 1,\n", + " 'for': 1,\n", + " 'destruction': 1,\n", + " 'Is': 1,\n", + " 'also': 1,\n", + " 'great': 1,\n", + " 'And': 1,\n", + " 'would': 1,\n", + " 'suffice': 1}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "poem = \"\"\"Some say the world will end in fire,\n", "Some say in ice.\n", @@ -120,7 +209,15 @@ "Is also great\n", "And would suffice.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "from collections import Counter\n", + "\n", + "dictum = \"\\\\w+\"\n", + "poem_words = re.findall(dictum,poem)\n", + "poem_words\n", + "\n", + "numbers_ofwords = dict(Counter(poem_words))\n", + "numbers_ofwords" ] }, { @@ -132,9 +229,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['I', 'was', 'angry', 'with', 'my', 'friend', 'I', 'told', 'my', 'wrath', 'my', 'wrath', 'did', 'end', 'I', 'was', 'angry', 'with', 'my', 'foe', 'I', 'told', 'it', 'not', 'my', 'wrath', 'did', 'grow', 'And', 'I', 'waterd', 'it', 'in', 'fears', 'Night', 'morning', 'with', 'my', 'tears', 'And', 'I', 'sunned', 'it', 'with', 'smiles', 'And', 'with', 'soft', 'deceitful', 'wiles', 'And', 'it', 'grew', 'both', 'day', 'and', 'night', 'Till', 'it', 'bore', 'an', 'apple', 'bright', 'And', 'my', 'foe', 'beheld', 'it', 'shine', 'And', 'he', 'knew', 'that', 'it', 'was', 'mine', 'And', 'into', 'my', 'garden', 'stole', 'When', 'the', 'night', 'had', 'veild', 'the', 'pole', 'In', 'the', 'morning', 'glad', 'I', 'see', 'My', 'foe', 'outstretched', 'beneath', 'the', 'tree']\n", + "\n", + "\n", + "i was angry with my friend i told my wrath my wrath did end i was angry with my foe i told it not my wrath did grow and i waterd it in fears night morning with my tears and i sunned it with smiles and with soft deceitful wiles and it grew both day and night till it bore an apple bright and my foe beheld it shine and he knew that it was mine and into my garden stole when the night had veild the pole in the morning glad i see my foe outstretched beneath the tree\n" + ] + } + ], "source": [ "blacklist = ['and', 'as', 'an', 'a', 'the', 'in', 'it']\n", "\n", @@ -158,7 +266,23 @@ "In the morning glad I see; \n", "My foe outstretched beneath the tree.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "dictum = \"\\\\w+\"\n", + "\n", + "poem_words_A= re.findall(dictum,poem)\n", + "poem_words_A\n", + "poem_words_B = ' '.join(map(str, poem_words_A))\n", + "\n", + "for word in (blacklist,poem_words_A):\n", + " if word in blacklist:\n", + " poem_words_A.remove(word)\n", + "\n", + "print(poem_words_A)\n", + "\n", + "print (\"\\n\")\n", + "\n", + "print(poem_words_B.lower())" ] }, { @@ -172,16 +296,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['The', 'Petals']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import re\n", "\n", "poem = \"\"\"The apparition of these faces in the crowd;\n", "Petals on a wet, black bough.\"\"\"\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "dictum = \"[A-Z]\\\\w+\"\n", + "\n", + "re.findall(dictum,poem)" ] }, { @@ -193,13 +332,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", "\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "second_data = str(data)\n", + "second_data\n", + "return_elements = re.search(\"\\d+?\\w+?\\d+?\",second_data)\n", + "\n", + "print (return_elements)" ] }, { @@ -215,18 +368,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "data = ['123abc', 'abc123', 'JohnSmith1', 'ABBY4', 'JANE']\n", - "# Your code here:\n" + "# Your code here:\n", + "\n", + "bonus_data = str(data)\n", + "bonus_data\n", + "atleast_one = re.search(\"(?=.*\\d)(?=.*[a-z])\",bonus_data)\n", + "\n", + "#(?=.*[a-z]) = Look ahead to see if at least one lower case letter exists\n", + "#(?=.*\\d) = Look ahead to see if at least one digit exists\n", + "\n", + "print (atleast_one)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -240,7 +417,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 6873bd2..d4a460e 100644 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -72,11 +72,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Ironhack is cool.', 'I love Ironhack.', 'I am a student at Ironhack.']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "docs = ['doc1.txt', 'doc2.txt', 'doc3.txt']" + "# Write your code here\n", + "import pandas as pd\n", + "\n", + "\n", + "dataframe = pd.DataFrame(pd.read_csv(docs[0]))\n", + "\n", + "for item in range(1,len(docs)):\n", + " data = pd.read_csv(docs[item])\n", + " df = pd.DataFrame(data)\n", + " dataframe = pd.concat([dataframe,df],axis=1)\n", + "\n", + "list(dataframe)" ] }, { @@ -88,11 +110,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Write your code here\n" + "corpus = list(dataframe)\n", + "type(corpus)" ] }, { @@ -104,10 +138,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Ironhack is cool.', 'I love Ironhack.', 'I am a student at Ironhack.']\n" + ] + } + ], + "source": [ + "print(corpus)" + ] }, { "cell_type": "markdown", @@ -132,11 +176,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[' ', ' ', 'I', 'r', 'o', 'n', 'h', 'a', 'c', 'k', ' ', 'i', 's', ' ', 'c', 'o', 'o', 'l', ' ', ' ', ' ', ' ', ' ', 'I', ' ', 'l', 'o', 'v', 'e', ' ', 'I', 'r', 'o', 'n', 'h', 'a', 'c', 'k', ' ', ' ', ' ', ' ', ' ', 'I', ' ', 'a', 'm', ' ', 'a', ' ', 's', 't', 'u', 'd', 'e', 'n', 't', ' ', 'a', 't', ' ', 'I', 'r', 'o', 'n', 'h', 'a', 'c', 'k', ' ', ' ', ' ']\n" + ] + } + ], "source": [ - "# Write your code here" + "# Write your code here\n", + "import re\n", + "\n", + "corpus_A = str(corpus)\n", + "corpus_lower= str((corpus_A.lower()))\n", + "corpus_B = [re.sub(r\"(^[^\\w]+)|([^\\w]+$)\", \" \", x) for x in corpus_A]\n", + "\n", + "print(corpus_B)" ] }, { @@ -148,10 +207,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "bag_of_words = []" + ] }, { "cell_type": "markdown", @@ -166,11 +227,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['k', 'v', 'o', 'l', 'm', 'h', 'u', 'e', 'd', ' ', 'I', 'c', 'i', 's', 'r', 'n', 'a', 't']\n" + ] + } + ], "source": [ - "# Write your code here\n" + "# Write your code here\n", + "\n", + "bag_of_arrays = set(corpus_B)\n", + "bag_of_words = list(bag_of_arrays)\n", + "\n", + "print(bag_of_words)" ] }, { @@ -200,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -218,10 +292,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 0, 1, 1, 1, 0, 0],\n", + " [0, 0, 0, 1, 0, 1, 0],\n", + " [1, 1, 0, 1, 0, 0, 1]])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.feature_extraction.text import CountVectorizer\n", + "vectorizer = CountVectorizer()\n", + "corpus \n", + "term_freq = vectorizer.fit_transform(corpus)\n", + "term_freq.toarray() " + ] }, { "cell_type": "markdown", @@ -304,7 +397,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -318,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.7" } }, "nbformat": 4,