Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 255 additions & 7 deletions regexes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"sample_text = \"\"\"\n",
Expand All @@ -32,11 +28,263 @@
"And dread avenging Phœbus, son of Jove.”\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"lines = [l.split() for l in sample_text.strip().splitlines()]"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['“Ye', 'kings', 'and', 'warriors!', 'may', 'your', 'vows', 'be', 'crown’d,'],\n",
" ['And', 'Troy’s', 'proud', 'walls', 'lie', 'level', 'with', 'the', 'ground.'],\n",
" ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o’er'],\n",
" ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore.'],\n",
" ['But,', 'oh!', 'relieve', 'a', 'wretched', 'parent’s', 'pain,'],\n",
" ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again;'],\n",
" ['If', 'mercy', 'fail,', 'yet', 'let', 'my', 'presents', 'move,'],\n",
" ['And', 'dread', 'avenging', 'Phœbus,', 'son', 'of', 'Jove.”']]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# [l.split() for l in sample_text.strip().splitlines()]\n",
"words_in_lines = []\n",
"for line in sample_text.strip().splitlines():\n",
" words_in_lines.append(line.split())\n",
"\n",
"words_in_lines"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['Ye', 'kings', 'and', 'warriors', 'may', 'your', 'vows', 'be', 'crown', 'd'],\n",
" ['And',\n",
" 'Troy',\n",
" 's',\n",
" 'proud',\n",
" 'walls',\n",
" 'lie',\n",
" 'level',\n",
" 'with',\n",
" 'the',\n",
" 'ground'],\n",
" ['May', 'Jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o', 'er'],\n",
" ['Safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore'],\n",
" ['But', 'oh', 'relieve', 'a', 'wretched', 'parent', 's', 'pain'],\n",
" ['And', 'give', 'Chryseïs', 'to', 'these', 'arms', 'again'],\n",
" ['If', 'mercy', 'fail', 'yet', 'let', 'my', 'presents', 'move'],\n",
" ['And', 'dread', 'avenging', 'Phœbus', 'son', 'of', 'Jove']]"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"\n",
"word_regex = re.compile(r\"\\w+\")\n",
"\n",
"lines = [word_regex.findall(l) for l in sample_text.strip().splitlines()]\n",
"lines\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_ground = 0\n",
"for line in lines:\n",
" for word in line:\n",
" if word == \"ground\":\n",
" n_ground += 1\n",
"n_ground"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Homework: Get the counts for _every_ word in the sample text."
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"“ye kings and warriors! may your vows be crown’d,\n",
"and troy’s proud walls lie level with the ground.\n",
"may jove restore you when your toils are o’er\n",
"safe to the pleasures of your native shore.\n",
"but, oh! relieve a wretched parent’s pain,\n",
"and give chryseïs to these arms again;\n",
"if mercy fail, yet let my presents move,\n",
"and dread avenging phœbus, son of jove.”\n",
"\n"
]
}
],
"source": [
"lowered_sample_text = sample_text.lower()\n",
"print(lowered_sample_text)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ye', 'kings', 'and', 'warriors', 'may', 'your', 'vows', 'be', 'crown’d', 'and', 'troy’s', 'proud', 'walls', 'lie', 'level', 'with', 'the', 'ground', 'may', 'jove', 'restore', 'you', 'when', 'your', 'toils', 'are', 'o’er', 'safe', 'to', 'the', 'pleasures', 'of', 'your', 'native', 'shore', 'but', 'oh', 'relieve', 'a', 'wretched', 'parent’s', 'pain', 'and', 'give', 'chryseïs', 'to', 'these', 'arms', 'again', 'if', 'mercy', 'fail', 'yet', 'let', 'my', 'presents', 'move', 'and', 'dread', 'avenging', 'phœbus', 'son', 'of', 'jove']\n"
]
}
],
"source": [
"word_regex = re.compile(r\"[\\w+]+(?:['’][\\w+]+)*\")\n",
"\n",
"words = word_regex.findall(lowered_sample_text)\n",
"print(words)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ye: 1\n",
"kings: 1\n",
"and: 4\n",
"warriors: 1\n",
"may: 2\n",
"your: 3\n",
"vows: 1\n",
"be: 1\n",
"crown’d: 1\n",
"troy’s: 1\n",
"proud: 1\n",
"walls: 1\n",
"lie: 1\n",
"level: 1\n",
"with: 1\n",
"the: 2\n",
"ground: 1\n",
"jove: 2\n",
"restore: 1\n",
"you: 1\n",
"when: 1\n",
"toils: 1\n",
"are: 1\n",
"o’er: 1\n",
"safe: 1\n",
"to: 2\n",
"pleasures: 1\n",
"of: 2\n",
"native: 1\n",
"shore: 1\n",
"but: 1\n",
"oh: 1\n",
"relieve: 1\n",
"a: 1\n",
"wretched: 1\n",
"parent’s: 1\n",
"pain: 1\n",
"give: 1\n",
"chryseïs: 1\n",
"these: 1\n",
"arms: 1\n",
"again: 1\n",
"if: 1\n",
"mercy: 1\n",
"fail: 1\n",
"yet: 1\n",
"let: 1\n",
"my: 1\n",
"presents: 1\n",
"move: 1\n",
"dread: 1\n",
"avenging: 1\n",
"phœbus: 1\n",
"son: 1\n"
]
}
],
"source": [
"word_counts = []\n",
"\n",
"for word in words:\n",
" if word not in word_counts:\n",
" count = words.count(word)\n",
" print(f\"{word}: {count}\")\n",
" word_counts.append(word)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
}
},
"nbformat": 4,
Expand Down