Skip to content

Commit 3e618fb

Browse files
author
webdev778
committed
Add a Python compiler
This commit adds a Python compiler for a Python version of Interscript. This is still work in progress, there are a couple of remaining bugs, the CI needs to be fixed, the Python package needs to be released.
1 parent fa8fbb5 commit 3e618fb

File tree

6 files changed

+351
-5
lines changed

6 files changed

+351
-5
lines changed

Gemfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ unless ENV["SKIP_JS"]
2626
end
2727
end
2828

29+
unless ENV["SKIP_PYTHON"]
30+
group :pyexec do
31+
gem 'pycall'
32+
end
33+
end
34+
2935
group :rababa do
3036
gem 'rababa', "~> 0.1.1"
3137
end

Rakefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ task :compile, [:compiler, :target] do |t, args|
1414
when "javascript"
1515
require "interscript/compiler/javascript"
1616
[Interscript::Compiler::Javascript, "js"]
17+
when "python"
18+
require "interscript/compiler/python"
19+
[Interscript::Compiler::Python, "py"]
1720
end
1821

1922
FileUtils.mkdir_p(args[:target])
@@ -34,7 +37,7 @@ task :compile, [:compiler, :target] do |t, args|
3437
File.write(args[:target] + "/" + map + "." + ext, code)
3538
end
3639

37-
File.write(args[:target] + "/index.json", maplist.to_json)
40+
File.write(args[:target] + "/index.json", maplist.to_json) if args[:compiler] == "javascript"
3841
end
3942

4043
task :generate_visualization_html do

lib/interscript.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def transliterate_each(system_code, string, maps={}, &block)
4848
load(system_code, maps).(string, each: true, &block)
4949
end
5050

51-
def transliterate_file(system_code, input_file, output_file, maps={})
51+
def transliterate_file(system_code, input_file, output_file, maps={}, compiler: Interscript::Interpreter)
5252
input = File.read(input_file)
53-
output = transliterate(system_code, input, maps)
53+
output = transliterate(system_code, input, maps, compiler: compiler)
5454

5555
File.open(output_file, 'w') do |f|
5656
f.puts(output)

lib/interscript/command.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,23 @@ class Command < Thor
88
desc '<file>', 'Transliterate text'
99
option :system, aliases: '-s', required: true, desc: 'Transliteration system'
1010
option :output, aliases: '-o', required: false, desc: 'Output file'
11+
option :compiler, aliases: '-c', required: false, desc: 'Compiler (eg. Interscript::Compiler::Python)'
1112
# Was this option really well thought out? The last parameter is a cache, isn't it?
1213
#option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
1314

1415
def translit(input)
16+
compiler = if options[:compiler]
17+
compiler = options[:compiler].split("::").last.downcase
18+
require "interscript/compiler/#{compiler}"
19+
Object.const_get(options[:compiler])
20+
else
21+
Interscript::Interpreter
22+
end
23+
1524
if options[:output]
16-
Interscript.transliterate_file(options[:system], input, options[:output]) #, JSON.parse(options[:map]))
25+
Interscript.transliterate_file(options[:system], input, options[:output], compiler: compiler)
1726
else
18-
puts Interscript.transliterate(options[:system], IO.read(input))
27+
puts Interscript.transliterate(options[:system], IO.read(input), compiler: compiler)
1928
end
2029
end
2130

lib/interscript/compiler/python.rb

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
require 'pycall'
2+
3+
class Interscript::Compiler::Python < Interscript::Compiler
4+
def escape(val)
5+
case val
6+
when String, Integer
7+
val.inspect
8+
when Symbol
9+
val.to_s.inspect
10+
when Hash
11+
"{"+
12+
val.map { |k,v| "#{escape k}:#{escape v}" }.join(",")+
13+
"}"
14+
when Array
15+
"[" + val.map { |i| escape i }.join(",") + "]"
16+
when nil
17+
"None"
18+
else
19+
pp [:error, val]
20+
exit!
21+
end
22+
end
23+
24+
def re_escape(val)
25+
@pycall_regex ||= PyCall.import_module("regex")
26+
@pycall_regex.escape(val)
27+
end
28+
29+
def new_regexp(str)
30+
"re.compile(\"#{str}\", re.MULTILINE)"
31+
end
32+
33+
def indent
34+
@indent += 4
35+
yield
36+
@indent -= 4
37+
end
38+
39+
def emit(code)
40+
@code << (" " * @indent) << code << "\n"
41+
end
42+
43+
def compile(map, debug: false)
44+
@indent = 0
45+
@map = map
46+
@debug = debug
47+
@parallel_trees = {}
48+
@parallel_regexps = {}
49+
@code = ""
50+
emit "import interscript"
51+
emit "import regex as re"
52+
map.dependencies.map(&:full_name).each do |dep|
53+
emit "interscript.load_map(#{escape dep})"
54+
end
55+
56+
emit "interscript.stdlib.define_map(#{escape map.name})"
57+
58+
map.aliases.each do |name, value|
59+
val = compile_item(value.data, map, :str)
60+
emit "interscript.stdlib.add_map_alias(#{escape map.name}, #{escape name}, #{val})"
61+
val = "\"" + compile_item(value.data, map, :re) + "\""
62+
emit "interscript.stdlib.add_map_alias_re(#{escape map.name}, #{escape name}, #{val})"
63+
end
64+
65+
map.stages.each do |_, stage|
66+
compile_rule(stage, @map, true)
67+
end
68+
@parallel_trees.each do |k,v|
69+
emit "_PTREE_#{k} = #{escape v}"
70+
end
71+
@parallel_regexps.each do |k,v|
72+
v = "[\"#{v[0]}\", #{escape v[1]}]"
73+
emit "_PRE_#{k} = #{v}"
74+
end
75+
end
76+
77+
def parallel_regexp_compile(subs_hash)
78+
# puts subs_hash.inspect
79+
regexp = subs_hash.each_with_index.map do |p,i|
80+
"(?P<_%d>%s)" % [i,p[0]]
81+
end.join("|")
82+
subs_regexp = regexp
83+
# puts subs_regexp.inspect
84+
end
85+
86+
def compile_rule(r, map = @map, wrapper = false)
87+
return if r.reverse_run == true
88+
case r
89+
when Interscript::Node::Stage
90+
#c += "$map_debug ||= []\n" if @debug
91+
emit "def _stage_#{r.name}(s):"
92+
indent do
93+
r.children.each do |t|
94+
compile_rule(t, map)
95+
#c += %{$map_debug << [s.dup, #{@map.name.to_s.inspect}, #{r.name.to_s.inspect}, #{t.inspect.inspect}, #{comp.inspect}]\n} if @debug
96+
end
97+
emit "return s\n"
98+
end
99+
emit "interscript.stdlib.add_map_stage(#{escape @map.name}, #{escape r.name}, _stage_#{r.name})"
100+
when Interscript::Node::Group::Parallel
101+
begin
102+
# Try to build a tree
103+
a = []
104+
r.children.each do |i|
105+
raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
106+
raise Interscript::SystemConversionError, "Can't parallelize rules with :before" if i.before
107+
raise Interscript::SystemConversionError, "Can't parallelize rules with :after" if i.after
108+
raise Interscript::SystemConversionError, "Can't parallelize rules with :not_before" if i.not_before
109+
raise Interscript::SystemConversionError, "Can't parallelize rules with :not_after" if i.not_after
110+
111+
next if i.reverse_run == true
112+
a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
113+
end
114+
ah = a.hash.abs
115+
unless @parallel_trees.include? ah
116+
tree = Interscript::Stdlib.parallel_replace_compile_tree(a)
117+
@parallel_trees[ah] = tree
118+
end
119+
emit "s = interscript.stdlib.parallel_replace_tree(s, _PTREE_#{ah})"
120+
rescue
121+
# Otherwise let's build a megaregexp
122+
a = []
123+
Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
124+
raise Interscript::SystemConversionError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
125+
126+
next if i.reverse_run == true
127+
a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
128+
end
129+
ah = a.hash.abs
130+
unless @parallel_regexps.include? ah
131+
re = parallel_regexp_compile(a)
132+
@parallel_regexps[ah] = [re, a.map(&:last)]
133+
end
134+
emit "s = interscript.stdlib.parallel_regexp_gsub(s, *_PRE_#{ah})"
135+
end
136+
when Interscript::Node::Rule::Sub
137+
from = new_regexp build_regexp(r, map)
138+
if r.to == :upcase
139+
to = 'interscript.stdlib.upper'
140+
elsif r.to == :downcase
141+
to = 'interscript.stdlib.lower'
142+
else
143+
to = compile_item(r.to, map, :str)
144+
end
145+
emit "s = #{from}.sub(#{to}, s)"
146+
when Interscript::Node::Rule::Funcall
147+
emit "s = interscript.functions.#{r.name}(s, #{escape r.kwargs})"
148+
when Interscript::Node::Rule::Run
149+
if r.stage.map
150+
doc = map.dep_aliases[r.stage.map].document
151+
stage = doc.imported_stages[r.stage.name]
152+
else
153+
stage = map.imported_stages[r.stage.name]
154+
end
155+
emit "s = interscript.transliterate(#{escape stage.doc_name}, s, #{escape stage.name})"
156+
else
157+
raise Interscript::SystemConversionError, "Can't compile unhandled #{r.class}"
158+
end
159+
end
160+
161+
def build_regexp(r, map=@map)
162+
from = compile_item(r.from, map, :re)
163+
before = compile_item(r.before, map, :re) if r.before
164+
after = compile_item(r.after, map, :re) if r.after
165+
not_before = compile_item(r.not_before, map, :re) if r.not_before
166+
not_after = compile_item(r.not_after, map, :re) if r.not_after
167+
168+
re = ""
169+
re += "(?<=#{before})" if before
170+
re += "(?<!#{not_before})" if not_before
171+
re += from
172+
re += "(?!#{not_after})" if not_after
173+
re += "(?=#{after})" if after
174+
re
175+
end
176+
177+
def compile_item i, doc=@map, target=nil
178+
i = i.first_string if %i[str parstr].include? target
179+
i = Interscript::Node::Item.try_convert(i)
180+
if target == :parstr
181+
parstr = true
182+
target = :par
183+
end
184+
185+
out = case i
186+
when Interscript::Node::Item::Alias
187+
astr = if i.map
188+
d = doc.dep_aliases[i.map].document
189+
a = d.imported_aliases[i.name]
190+
raise Interscript::SystemConversionError, "Alias #{i.name} of #{i.stage.map} not found" unless a
191+
"interscript.stdlib.get_alias_ALIASTYPE(#{escape a.doc_name}, #{escape a.name})"
192+
elsif Interscript::Stdlib::ALIASES.include?(i.name)
193+
if target != :re && Interscript::Stdlib.re_only_alias?(i.name)
194+
raise Interscript::SystemConversionError, "Can't use #{i.name} in a #{target} context"
195+
end
196+
stdlib_alias = true
197+
"interscript.stdlib.aliases[#{escape i.name}]"
198+
else
199+
a = doc.imported_aliases[i.name]
200+
raise Interscript::SystemConversionError, "Alias #{i.name} not found" unless a
201+
202+
"interscript.stdlib.get_alias_ALIASTYPE(#{escape a.doc_name}, #{escape a.name})"
203+
end
204+
205+
if target == :str
206+
astr = astr.sub("_ALIASTYPE(", "(")
207+
elsif target == :re
208+
astr = "\"+#{astr.sub("_ALIASTYPE(", "_re(")}+\""
209+
elsif parstr && stdlib_alias
210+
astr = Interscript::Stdlib::ALIASES[i.name]
211+
elsif target == :par
212+
# raise NotImplementedError, "Can't use aliases in parallel mode yet"
213+
astr = Interscript::Stdlib::ALIASES[i.name]
214+
end
215+
when Interscript::Node::Item::String
216+
if target == :str
217+
# Replace \1 with \\1, this is weird, but it works!
218+
i.data.gsub("\\", "\\\\\\\\").inspect
219+
elsif target == :par
220+
i.data
221+
elsif target == :re
222+
re_escape(i.data)
223+
end
224+
when Interscript::Node::Item::Group
225+
if target == :par
226+
i.children.map do |j|
227+
compile_item(j, doc, target)
228+
end.reduce([""]) do |j,k|
229+
Array(j).product(Array(k)).map(&:join)
230+
end
231+
elsif target == :str
232+
i.children.map { |j| compile_item(j, doc, target) }.join("+")
233+
elsif target == :re
234+
i.children.map { |j| compile_item(j, doc, target) }.join
235+
end
236+
when Interscript::Node::Item::CaptureGroup
237+
if target != :re
238+
raise Interscript::SystemConversionError, "Can't use a CaptureGroup in a #{target} context"
239+
end
240+
"(" + compile_item(i.data, doc, target) + ")"
241+
when Interscript::Node::Item::Maybe,
242+
Interscript::Node::Item::MaybeSome,
243+
Interscript::Node::Item::Some
244+
245+
resuffix = { Interscript::Node::Item::Maybe => "?" ,
246+
Interscript::Node::Item::Some => "+" ,
247+
Interscript::Node::Item::MaybeSome => "*" }[i.class]
248+
249+
if target == :par
250+
raise Interscript::SystemConversionError, "Can't use a Maybe in a #{target} context"
251+
end
252+
if Interscript::Node::Item::String === i.data && i.data.data.length != 1
253+
"(?:" + compile_item(i.data, doc, target) + ")" + resuffix
254+
else
255+
compile_item(i.data, doc, target) + resuffix
256+
end
257+
when Interscript::Node::Item::CaptureRef
258+
if target == :par
259+
raise Interscript::SystemConversionError, "Can't use CaptureRef in parallel mode"
260+
elsif target == :re
261+
"\\\\#{i.id}"
262+
elsif target == :str
263+
"\"\\\\#{i.id}\""
264+
end
265+
when Interscript::Node::Item::Any
266+
if target == :str
267+
raise Interscript::SystemConversionError, "Can't use Any in a string context" # A linter could find this!
268+
elsif target == :par
269+
i.data.map(&:data)
270+
elsif target == :re
271+
case i.value
272+
when Array
273+
data = i.data.map { |j| compile_item(j, doc, target) }
274+
"(?:"+data.join("|")+")"
275+
when String
276+
"[#{re_escape(i.value)}]"
277+
when Range
278+
"[#{re_escape(i.value.first)}-#{re_escape(i.value.last)}]"
279+
end
280+
end
281+
end
282+
end
283+
284+
@maps_loaded = {}
285+
@ctx = nil
286+
class << self
287+
attr_accessor :maps_loaded
288+
attr_accessor :ctx
289+
end
290+
291+
def load
292+
if !self.class.maps_loaded[@map.name]
293+
@map.dependencies.each do |dep|
294+
dep = dep.full_name
295+
if !self.class.maps_loaded[dep]
296+
Interscript.load(dep, compiler: self.class).load
297+
end
298+
end
299+
300+
ctx = self.class.ctx
301+
python_src_path = File.join(__dir__, '..', '..', '..', '..', 'python', 'src')
302+
unless ctx
303+
PyCall.sys.path.append(python_src_path)
304+
self.class.ctx = PyCall.import_module("interscript")
305+
end
306+
#puts @code
307+
File.write("#{python_src_path}/interscript/maps/#{@map.name}.py", @code)
308+
self.class.ctx.load_map(@map.name)
309+
310+
self.class.maps_loaded[@map.name] = true
311+
end
312+
end
313+
314+
def call(str, stage=:main)
315+
load
316+
self.class.ctx.transliterate(@map.name, str, stage.to_s)
317+
end
318+
319+
def self.read_debug_data
320+
$map_debug || []
321+
end
322+
323+
def self.reset_debug_data
324+
$map_debug = []
325+
end
326+
end

0 commit comments

Comments
 (0)