-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild_opcode_pack.py
More file actions
107 lines (78 loc) · 2.26 KB
/
build_opcode_pack.py
File metadata and controls
107 lines (78 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import csv
import os
import zipfile
import json
LANGUAGES = ["python", "javascript", "c", "rust", "go", "lua", "sql"]
BASE_OPCODES = [
"LOAD", "STORE", "MOVE", "ADD", "SUB", "MUL", "DIV",
"CMP", "JMP", "JZ", "JNZ", "CALL", "RET", "PRINT"
]
def expand_opcodes(n=1000):
ops = []
for i in range(n):
base = BASE_OPCODES[i % len(BASE_OPCODES)]
ops.append(f"{base}_{i}")
return ops
def generate_language_map(lang, opcodes):
filename = f"{lang}_map.csv"
with open(filename, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow([
"opcode",
"variant_simple",
"variant_function",
"variant_native",
"notes"
])
for op in opcodes:
base = op.split("_")[0]
writer.writerow([
op,
f"{lang}_simple_{op}()",
f"{base.lower()}_{lang}()",
generate_native(lang, base),
"auto-generated mapping"
])
return filename
def generate_native(lang, op):
if lang == "python":
return f"# python_{op}"
if lang == "javascript":
return f"// js_{op}"
if lang == "c":
return f"/* c_{op} */"
if lang == "rust":
return f"// rust_{op}"
if lang == "go":
return f"// go_{op}"
if lang == "sql":
return f"-- sql_{op}"
return f"{lang}_{op}"
def build_zip():
opcodes = expand_opcodes(1000)
files = []
for lang in LANGUAGES:
files.append(generate_language_map(lang, opcodes))
# IR master file
with open("ir_opcodes.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["opcode"])
for op in opcodes:
writer.writerow([op])
files.append("ir_opcodes.csv")
# meta file
meta = {
"languages": LANGUAGES,
"opcode_count": len(opcodes),
"type": "LLM cross-language IR compiler dataset"
}
with open("meta.json", "w") as f:
json.dump(meta, f, indent=2)
files.append("meta.json")
# zip
with zipfile.ZipFile("opcode_pack.zip", "w") as z:
for file in files:
z.write(file)
print("Built opcode_pack.zip")
if __name__ == "__main__":
build_zip()