From 992bb46784bc86dd7e264ed87e7753f8b7fd1bf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julianne=20Wei=C3=9F?= <julianne.weisz@gmail.com>
Date: Fri, 1 May 2026 15:06:03 +0200
Subject: [PATCH 1/3] Add A4 solution

---
 A4/juwei95-Markov.py | 67 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 A4/juwei95-Markov.py

diff --git a/A4/juwei95-Markov.py b/A4/juwei95-Markov.py
new file mode 100644
index 0000000..d61eec5
--- /dev/null
+++ b/A4/juwei95-Markov.py
@@ -0,0 +1,67 @@
+import argparse
+import sys
+import random
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(prog='juwei95-Markov.py', description='Generates random text based on training data')
+    parser.add_argument('-o', '--order', action='store',      metavar='k', default=1, type=int, help='use Markov order k')
+    parser.add_argument('-w', '--words', action='store_true',                 help='use word-based generation instead of character-based generation')
+    parser.add_argument('-s', '--seed',  action='store',      metavar='seed', default=0, help='initialize the random number generator with seed')
+    parser.add_argument('filename',      action='store',      nargs='?',      help='input file name containing training text - if omitted, input is read from stdin')
+    args = parser.parse_args()
+    if args.order <= 0:
+        print(f"Invalid order of {args.order}, order must be > 0!", file=sys.stderr)
+        exit(-1)
+    return args
+
+def parse_file(args: argparse.Namespace):
+    if args.filename:
+        infile = open(args.filename)
+    else:
+        infile = sys.stdin
+    tokens = []
+    for line in infile:
+        if args.words:
+            tokens.extend(filter(lambda word: word != "", line.split()))
+        else:
+            tokens.extend(list(line))
+    if args.filename:
+        infile.close()
+    return tokens
+
+def build_continuation_map(tokens: list[str], args: argparse.Namespace) -> dict[tuple[str], list[str]]:
+    continuation_map: dict[tuple[str], list[str]] = {}
+    for pos in range(len(tokens) - args.order):
+        key = tuple(tokens[pos:pos + args.order])
+        value = tokens[pos + args.order]
+        if key in continuation_map:
+            continuation_map[key].append(value)
+        else:
+            continuation_map[key] = [value]
+    return continuation_map
+
+def print_output_token(token: str, args: argparse.Namespace):
+    print(token, end=" " if args.words else "")
+
+def generate_text(continuation_map: dict[tuple[str], list[str]], tokens: list[str], args: argparse.Namespace) -> str:
+    random.seed(args.seed)
+    context = tokens[:args.order]
+    for token in tokens[:args.order]:
+        print_output_token(token, args)
+    while tuple(context) in continuation_map:
+        continuations = continuation_map[tuple(context)]
+        next_token = continuations[random.randrange(len(continuations))]
+        context.pop(0)
+        context.append(next_token)
+        print_output_token(next_token, args)
+    print()
+
+def main():
+    args = parse_args()
+    tokens = parse_file(args)
+    continuation_map = build_continuation_map(tokens, args)
+    generate_text(continuation_map, tokens, args)
+
+if __name__ == "__main__":
+    main()

From de567ab7edb9f725fe636708b905c4935a79d45e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julianne=20Wei=C3=9F?= <julianne.weisz@gmail.com>
Date: Fri, 1 May 2026 16:08:53 +0200
Subject: [PATCH 2/3] Refactor to generator pattern

---
 A4/juwei95-Markov.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/A4/juwei95-Markov.py b/A4/juwei95-Markov.py
index d61eec5..8de7a60 100644
--- a/A4/juwei95-Markov.py
+++ b/A4/juwei95-Markov.py
@@ -44,24 +44,28 @@ def build_continuation_map(tokens: list[str], args: argparse.Namespace) -> dict[
 def print_output_token(token: str, args: argparse.Namespace):
     print(token, end=" " if args.words else "")
 
-def generate_text(continuation_map: dict[tuple[str], list[str]], tokens: list[str], args: argparse.Namespace) -> str:
+def generate_text(continuation_map: dict[tuple[str], list[str]], tokens: list[str], args: argparse.Namespace):
     random.seed(args.seed)
     context = tokens[:args.order]
     for token in tokens[:args.order]:
-        print_output_token(token, args)
+        yield token
     while tuple(context) in continuation_map:
         continuations = continuation_map[tuple(context)]
         next_token = continuations[random.randrange(len(continuations))]
         context.pop(0)
         context.append(next_token)
-        print_output_token(next_token, args)
-    print()
+        yield next_token
 
 def main():
     args = parse_args()
     tokens = parse_file(args)
     continuation_map = build_continuation_map(tokens, args)
-    generate_text(continuation_map, tokens, args)
+    try:
+        for token in generate_text(continuation_map, tokens, args):
+            print_output_token(token, args)
+    except KeyboardInterrupt:
+        pass
+    print()
 
 if __name__ == "__main__":
     main()

From f9ab4ff91fe03dc9465cd41732a27dddfdf4e191 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julianne=20Wei=C3=9F?= <julianne.weisz@gmail.com>
Date: Fri, 1 May 2026 16:09:43 +0200
Subject: [PATCH 3/3] Add README

---
 A4/README-A4.md | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 A4/README-A4.md

diff --git a/A4/README-A4.md b/A4/README-A4.md
new file mode 100644
index 0000000..b1d84b0
--- /dev/null
+++ b/A4/README-A4.md
@@ -0,0 +1,42 @@
+# A4 - Markov
+
+Generates random text based on training data.
+
+
+## Usage
+
+### Synopsis
+
+```bash
+juwei95-Markov.py [-h] [-o k] [-w] [-s seed] [filename]
+```
+
+### Positional arguments
+
+| Argument   | Description                                                                          |
+| ---------- | ------------------------------------------------------------------------------------ |
+| `filename` | The input file containing the training data, if omitted the input is read from stdin |
+
+### Options
+
+| Opt       | Option        | Description                                                                                                          |
+| --------- | ------------- | -------------------------------------------------------------------------------------------------------------------- |
+| `-h`      | `--help`      | Show a help message and exit                                                                                         |
+| `-o k`    | `--order k`   | Use Markov order k                                                                                                   |
+| `-w`      | `--words`     | Use word-based generation instead of character-based generation                                                      |
+| `-s seed` | `--seed seed` | Initialize the random number generator with seed. Same input with the same seed will always produce the same output. |
+
+
+### Example
+
+```bash
+python3 juwei95-Markov.py erlkoenig.txt -o 5
+```
+
+
+## Details
+
+* The next output token is randomly selected from a list of non-unique possible continuations for the current context. The chance of a continuation beeing selected is implicitly determined by its number of occurences in the training data, after the given context.
+* ⚠️ ***Warning***: There is no hard limit on the amount of text produced. Chances are that the generator will not terminate by itself. You may have to use `ctrl + c` to kill it.
+* Implemented in python using only standard library modules.
+* Tested using python 3.12.12 on Ubuntu 24.04.3 LTS under WSL.