TBIAPBC · juwei95 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/A4/README-A4.md b/A4/README-A4.md
@@ -0,0 +1,42 @@
+# A4 - Markov
+
+Generates random text based on training data.
+
+
+## Usage
+
+### Synopsis
+
+```bash
+juwei95-Markov.py [-h] [-o k] [-w] [-s seed] [filename]
+```
+
+### Positional arguments
+
+| Argument   | Description                                                                          |
+| ---------- | ------------------------------------------------------------------------------------ |
+| `filename` | The input file containing the training data, if omitted the input is read from stdin |
+
+### Options
+
+| Opt       | Option        | Description                                                                                                          |
+| --------- | ------------- | -------------------------------------------------------------------------------------------------------------------- |
+| `-h`      | `--help`      | Show a help message and exit                                                                                         |
+| `-o k`    | `--order k`   | Use Markov order k                                                                                                   |
+| `-w`      | `--words`     | Use word-based generation instead of character-based generation                                                      |
+| `-s seed` | `--seed seed` | Initialize the random number generator with seed. Same input with the same seed will always produce the same output. |
+
+
+### Example
+
+```bash
+python3 juwei95-Markov.py erlkoenig.txt -o 5
+```
+
+
+## Details
+
+* The next output token is randomly selected from a list of non-unique possible continuations for the current context. The chance of a continuation beeing selected is implicitly determined by its number of occurences in the training data, after the given context.
+* ⚠️ ***Warning***: There is no hard limit on the amount of text produced. Chances are that the generator will not terminate by itself. You may have to use `ctrl + c` to kill it.
+* Implemented in python using only standard library modules.
+* Tested using python 3.12.12 on Ubuntu 24.04.3 LTS under WSL.
diff --git a/A4/juwei95-Markov.py b/A4/juwei95-Markov.py
@@ -0,0 +1,71 @@
+import argparse
+import sys
+import random
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(prog='juwei95-Markov.py', description='Generates random text based on training data')
+    parser.add_argument('-o', '--order', action='store',      metavar='k', default=1, type=int, help='use Markov order k')
+    parser.add_argument('-w', '--words', action='store_true',                 help='use word-based generation instead of character-based generation')
+    parser.add_argument('-s', '--seed',  action='store',      metavar='seed', default=0, help='initialize the random number generator with seed')
+    parser.add_argument('filename',      action='store',      nargs='?',      help='input file name containing training text - if omitted, input is read from stdin')
+    args = parser.parse_args()
+    if args.order <= 0:
+        print(f"Invalid order of {args.order}, order must be > 0!", file=sys.stderr)
+        exit(-1)
+    return args
+
+def parse_file(args: argparse.Namespace):
+    if args.filename:
+        infile = open(args.filename)
+    else:
+        infile = sys.stdin
+    tokens = []
+    for line in infile:
+        if args.words:
+            tokens.extend(filter(lambda word: word != "", line.split()))
+        else:
+            tokens.extend(list(line))
+    if args.filename:
+        infile.close()
+    return tokens
+
+def build_continuation_map(tokens: list[str], args: argparse.Namespace) -> dict[tuple[str], list[str]]:
+    continuation_map: dict[tuple[str], list[str]] = {}
+    for pos in range(len(tokens) - args.order):
+        key = tuple(tokens[pos:pos + args.order])
+        value = tokens[pos + args.order]
+        if key in continuation_map:
+            continuation_map[key].append(value)
+        else:
+            continuation_map[key] = [value]
+    return continuation_map
+
+def print_output_token(token: str, args: argparse.Namespace):
+    print(token, end=" " if args.words else "")
+
+def generate_text(continuation_map: dict[tuple[str], list[str]], tokens: list[str], args: argparse.Namespace):
+    random.seed(args.seed)
+    context = tokens[:args.order]
+    for token in tokens[:args.order]:
+        yield token
+    while tuple(context) in continuation_map:
+        continuations = continuation_map[tuple(context)]
+        next_token = continuations[random.randrange(len(continuations))]
+        context.pop(0)
+        context.append(next_token)
+        yield next_token
+
+def main():
+    args = parse_args()
+    tokens = parse_file(args)
+    continuation_map = build_continuation_map(tokens, args)
+    try:
+        for token in generate_text(continuation_map, tokens, args):
+            print_output_token(token, args)
+    except KeyboardInterrupt:
+        pass
+    print()
+
+if __name__ == "__main__":
+    main()