-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patha2ml-reader.lua
More file actions
189 lines (167 loc) · 5.55 KB
/
a2ml-reader.lua
File metadata and controls
189 lines (167 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
-- SPDX-License-Identifier: MIT
-- Copyright (c) 2026 Jonathan D.A. Jewell
--
-- a2ml-reader.lua — Pandoc custom reader for A2ML (Attested Markup Language)
--
-- Parses .a2ml files into Pandoc's internal AST, enabling conversion
-- to any Pandoc output format (HTML, PDF, DOCX, Markdown, etc.).
--
-- Usage:
-- pandoc -f a2ml-reader.lua input.a2ml -o output.html
-- pandoc -f a2ml-reader.lua input.a2ml -t markdown
--
-- A2ML Syntax:
-- # Heading → Header
-- ## Sub-heading → Header (level 2)
-- - list item → BulletList
-- * list item → BulletList
-- **bold** → Strong
-- *italic* → Emph
-- [text](url) → Link
-- @ref(id) → Link to #id
-- ```lang ... ``` → CodeBlock
-- @directive: ... @end → Div with class "directive"
-- ;; comment → stripped (Scheme-style comments)
-- Paragraphs → Para (separated by blank lines)
--
-- Media type: application/vnd.a2ml (IANA registration pending)
-- Reader entry point — Pandoc calls this with the file content
function Reader(input, reader_options)
local source = tostring(input)
local blocks = {}
local lines = {}
-- Split into lines
for line in source:gmatch("([^\n]*)\n?") do
table.insert(lines, line)
end
local i = 1
local para_buffer = {}
-- Flush accumulated paragraph text
local function flush_para()
if #para_buffer > 0 then
local text = table.concat(para_buffer, "\n")
local inlines = parse_inlines(text)
table.insert(blocks, pandoc.Para(inlines))
para_buffer = {}
end
end
while i <= #lines do
local line = lines[i]
-- Skip Scheme-style comments
if line:match("^%s*;;") then
i = i + 1
-- Headings: # to #####
elseif line:match("^(#+)%s+(.+)$") then
flush_para()
local hashes, title = line:match("^(#+)%s+(.+)$")
local level = #hashes
local inlines = parse_inlines(title)
table.insert(blocks, pandoc.Header(level, inlines))
i = i + 1
-- Directive blocks: @name: ... @end
elseif line:match("^@(%w+)(.-):%s*$") then
flush_para()
local name = line:match("^@(%w+)")
local content_lines = {}
i = i + 1
while i <= #lines and not lines[i]:match("^@end%s*$") do
table.insert(content_lines, lines[i])
i = i + 1
end
i = i + 1 -- skip @end
local content_text = table.concat(content_lines, "\n")
local content_inlines = parse_inlines(content_text)
local div = pandoc.Div(
{ pandoc.Para(content_inlines) },
pandoc.Attr("", { "a2ml-directive", "a2ml-" .. name })
)
table.insert(blocks, div)
-- Code blocks: ```lang ... ```
elseif line:match("^```(.*)$") then
flush_para()
local lang = line:match("^```(.*)$") or ""
lang = lang:match("^%s*(.-)%s*$") -- trim
local code_lines = {}
i = i + 1
while i <= #lines and not lines[i]:match("^```%s*$") do
table.insert(code_lines, lines[i])
i = i + 1
end
i = i + 1 -- skip closing ```
local code = table.concat(code_lines, "\n")
local classes = {}
if lang ~= "" then classes = { lang } end
table.insert(blocks, pandoc.CodeBlock(code, pandoc.Attr("", classes)))
-- Bullet lists: - item or * item
elseif line:match("^%s*[%-*]%s+(.+)$") then
flush_para()
local items = {}
while i <= #lines and lines[i]:match("^%s*[%-*]%s+(.+)$") do
local item_text = lines[i]:match("^%s*[%-*]%s+(.+)$")
local item_inlines = parse_inlines(item_text)
table.insert(items, { pandoc.Plain(item_inlines) })
i = i + 1
end
table.insert(blocks, pandoc.BulletList(items))
-- Blank lines: flush paragraph
elseif line:match("^%s*$") then
flush_para()
i = i + 1
-- Regular text: accumulate into paragraph
else
table.insert(para_buffer, line)
i = i + 1
end
end
flush_para()
return pandoc.Pandoc(blocks)
end
-- Parse inline markup within a text string
function parse_inlines(text)
local inlines = {}
local pos = 1
while pos <= #text do
-- Strong: **text**
local s, e, content = text:find("%*%*(.-)%*%*", pos)
if s == pos then
table.insert(inlines, pandoc.Strong(pandoc.Inlines(content)))
pos = e + 1
-- Emphasis: *text*
else
s, e, content = text:find("%*(.-)%*", pos)
if s == pos then
table.insert(inlines, pandoc.Emph(pandoc.Inlines(content)))
pos = e + 1
-- Link: [text](url)
else
s, e, link_text, url = text:find("%[(.-)%]%((.-)%)", pos)
if s == pos then
table.insert(inlines, pandoc.Link(pandoc.Inlines(link_text), url))
pos = e + 1
-- Reference: @ref(id)
else
s, e, ref_id = text:find("@ref%((.-)%)", pos)
if s == pos then
table.insert(inlines, pandoc.Link(pandoc.Inlines(ref_id), "#" .. ref_id))
pos = e + 1
-- Inline code: `code`
else
s, e, code = text:find("`(.-)`", pos)
if s == pos then
table.insert(inlines, pandoc.Code(code))
pos = e + 1
-- Regular character
else
-- Grab text up to next special character
local next_special = text:find("[%*%[`@]", pos + 1) or (#text + 1)
local chunk = text:sub(pos, next_special - 1)
table.insert(inlines, pandoc.Str(chunk))
pos = next_special
end
end
end
end
end
end
return pandoc.Inlines(inlines)
end