-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodex_extractor_codexmode.py
More file actions
90 lines (82 loc) · 3.17 KB
/
codex_extractor_codexmode.py
File metadata and controls
90 lines (82 loc) · 3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import json
import sys
import re
def parse_codex_blocks(md_text):
entries = []
current_entry = {}
lines = md_text.splitlines()
for line in lines:
line = line.strip()
if line.startswith("@"):
if "@category:" in line and current_entry:
entries.append(current_entry)
current_entry = {}
if ":" in line:
key, value = line.split(":", 1)
key = key.strip().lstrip("@")
value = value.strip()
try:
parsed_value = json.loads(value)
current_entry[key] = parsed_value
except:
# fallback: handle manually typed arrays like [tag1, tag2]
if value.startswith("[") and value.endswith("]") and "," in value:
stripped = value[1:-1]
parts = [part.strip().strip('"').strip("'") for part in stripped.split(",")]
current_entry[key] = parts
else:
current_entry[key] = value.strip('"')
elif line.startswith("#") and current_entry:
current_entry["title"] = line.lstrip("#").strip()
if current_entry:
entries.append(current_entry)
return entries
def write_codex_json(chapter_file, entries):
output_dir = os.path.join(os.path.dirname(__file__), "codex_json")
os.makedirs(output_dir, exist_ok=True)
base_name = os.path.basename(chapter_file).replace(".md", "")
json_filename = os.path.join(output_dir, f"codex_{base_name}.json")
with open(json_filename, "w") as f:
json.dump(entries, f, indent=2)
print(f"[✓] Codex exported to: {json_filename}")
return json_filename, os.path.join(output_dir, f"codex_{base_name}_scan.log")
def scan_symbols(md_text):
results = []
symbol_map = {
'"': 'Double Quote',
"'": 'Single Quote',
'{': 'Curly Brace Open',
'}': 'Curly Brace Close',
'[': 'Square Bracket Open',
']': 'Square Bracket Close',
'(': 'Parenthesis Open',
')': 'Parenthesis Close',
'<': 'Angle Bracket Open',
'>': 'Angle Bracket Close'
}
for i, line in enumerate(md_text.splitlines(), 1):
for symbol in symbol_map:
if symbol in line:
results.append(f"Line {i}: {symbol_map[symbol]} → {line.strip()}")
return results
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python3 codex_extractor_codexmode_v3.py <chapter_file.md>")
sys.exit(1)
chapter_file = sys.argv[1]
if not os.path.exists(chapter_file):
print(f"[ERROR] File not found: {chapter_file}")
sys.exit(1)
with open(chapter_file, "r") as f:
md_text = f.read()
# Extract codex blocks
codex_entries = parse_codex_blocks(md_text)
json_path, log_path = write_codex_json(chapter_file, codex_entries)
# Run and save symbol scan
print("\n=== Symbol Scan ===")
symbol_results = scan_symbols(md_text)
with open(log_path, "w") as log_file:
for res in symbol_results:
print(res)
log_file.write(res + "\n")