-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathebasm.py
More file actions
executable file
·351 lines (271 loc) · 11.7 KB
/
ebasm.py
File metadata and controls
executable file
·351 lines (271 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/python3
#
# A quick-and-dirty 65816 disassembler.
#
from array import array
from disassembler import *
#------------------------------------------------------------------------------
# HEURISTIC DISASSEMBLY
#------------------------------------------------------------------------------
def subroutine(src, status):
'''Reads a subroutine from src, reading instructions sequentially until an
RTL (0x6B) or RTS (0x60) opcode is reached. Returns pair (func,status),
where func is an object representing the subroutine and status is the
expected state of the CPU on return from the subroutine.'''
instructions = []
inst = None
while not inst or inst.op != 0x6b and inst.op != 0x60:
inst,status = instruction(src, status)
instructions.append(inst)
return (instructions, status)
def disassemble(src, base, flags=0):
# Initialize status
status = Status((base & 0xFF0000) >> 16, base & 0xFFFF, flags, flags)
instructions,status = subroutine(src, status)
for i in instructions:
print(disassembly(i))
class Subroutine(object):
def __init__(self, address):
self.instructions = []
self.address = address
self.exitstatus = None
def append(self, inst):
self.instructions.append(inst)
def recursive_subroutine(container, address, status, entities):
address = snesoffset(address)
if address in entities:
if not isinstance(entities[address], Subroutine):
print("Warning: attempted to recurse into non-subroutine")
return entities[address]
print('Found new subroutine at ${:06X}'.format(address))
src = iterfrom(container, fileoffset(address))
status = Status(bank(address), offset(address), status.m, status.x)
# Register a subroutine in the entities table first, just in case we get
# here again through recursion
sub = Subroutine(address)
entities[address] = sub
inst = None
while not inst or inst.op != 0x60 and inst.op != 0x6B:
inst,status = instruction(src, status)
sub.append(inst)
if inst.op == 0x22:
print(disassembly(inst))
recursive_subroutine(container, inst.operand, status, entities)
elif inst.op == 0x20:
print(disassembly(inst))
subadr = makeadr(inst.status.pbr, inst.operand)
recursive_subroutine(container, subadr, status, entities)
#sub.exitstatus = status
# ohsnap! so, we set exit status here at the end... but the exitstatus of
# the subroutine object is None until this point, and we _might_ get back to
# this subroutine through mutual recursion.... in which case whatever gets
# there will see a 'None' status!
#
# Um, quickfix for this is to offer the entry status as a best guess for the
# exit status? That doesn't make much sense though.
#
# Better solution for now: don't use the exit status from subroutines.
#
# Additionally, we need to start being careful about status.pbr and status.pc;
# the ending status of this function will be the successor of an RTS or RTL,
# which of course isn't known statically for pbr and pc. Those fields should
# be marked with an "Unknown" value, preferably. That way if we screw up and
# let Unknown pbr and pc values propagate to other disassemblies, it will be
# obvious what has happened, as opposed to merely seeing wrong program counter
# values.
return sub
def recursive_disassemble(container, address, status):
'''Disassembles a segment of code, recursively following subroutine calls.
The 'top' level of code is only followed until the first untraceable
jump, i.e., any indirect jump or RTS/RTL.'''
src = iterfrom(container, fileoffset(address))
entities = dict()
while True:
inst, status = instruction(src, status)
entities[makeadr(inst.status.pbr, inst.status.pc)] = inst
# JSL long
if inst.op == 0x22:
recursive_subroutine(container, inst.operand, status, entities)
# JSR short
elif inst.op == 0x20:
subadr = makeadr(inst.status.pbr, inst.operand)
recursive_subroutine(container, subadr, status, entities)
# Untraceable indirect jumps
elif inst.op in { 0x6C, 0x7C, 0xDC, 0xFC }:
print("Warning: untraceable jump")
break
# Subroutine return
elif inst.op in { 0x60, 0x6B }:
break
# Traceable jumps
elif inst.op in { 0x4C, 0x5C }:
if inst.op == 0x4C:
newadr = makeadr(inst.status.pbr, inst.operand)
else:
newadr = inst.operand
src = iterfrom(container, fileoffset(newadr))
status = Status(bank(newadr), offset(newadr), status.m, status.x)
return entities, status
def bank(address):
return (address & 0xFF0000) >> 16
def offset(address):
return address & 0xFFFF
def makeadr(bank, offset):
return (bank << 16) | offset
def fileoffset(adr):
'''Returns the file offset corresponding to the given address. adr can be
either a file offset (in which case no transformation occurs) or a hirom
virtual memory offset.'''
if 0xC00000 <= adr < 0x1000000:
return adr - 0xC00000
else:
return adr
def snesoffset(adr):
'''Returns the virtual address corresponding to the given address. adr can
be a virtual address (in which no transformation occurs) or a file
offset.'''
if 0 <= adr <= 0x300000:
return adr + 0xC00000
else:
return adr
# TODO: this should be a parameter
goodbanks = { 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xEE, 0xEF }
def testwindow(container, start, flags=0):
is_code = 0
not_code = 0
src = iterfrom(container, start)
address = start + 0xC00000
status = Status((address & 0xFF0000) >> 16, address & 0xFFFF, flags, flags)
instructions = []
inst = None
while not inst or inst.op != 0x6b and inst.op != 0x60:
inst,status = instruction(src, status)
explanation = ''
# Check for 'bad' codes
if inst.op == 0x00:
not_code += 50 # BRK
explanation = 'BRK encountered'
if inst.op == 0x02:
not_code += 10 # COP
explanation = 'COP encountered'
if inst.op in {0x22, 0x5C} and bank(inst.operand) not in goodbanks:
not_code += 100
explanation = 'JMP/JSL to non-code bank'
if inst.op in {0x1C, 0x14, 0x0C, 0x04}:
not_code += 20
explanation = 'TRB/TSB encountered'
# Check for 'good' codes
if 'const' in instruction_set[inst.op] and \
not status.m and \
inst.operand < 256:
is_code += 20
explanation = 'Likely immediate operand'
if inst.op in {0x22, 0x5C} and bank(inst.operand) in goodbanks:
is_code += 10
explanation = 'JMP/JSL to known code bank'
if inst.op in {0x6b, 0x60} and instructions[len(instructions)-1].op == 0x2B:
# PLD followed by RTS or RTL
is_code += 20
explanation = 'Return from subroutine sequence'
print('{:50}{}'.format(disassembly(inst), explanation))
instructions.append(inst)
print('testwindow analysis for ${:06X}'.format(address))
print('is_code confidence: {}'.format(is_code))
print('not_code confidence: {}'.format(not_code))
# Closed disassembly
#
# Construct a code graph based on all branches and jumps. NOTE: we also need to
# fix successor status handling in core. In particular, what is the successor
# status of a branch instruction like BEQ? I think part of the confusion here
# relates from the questionable choice to make PBR and PC part of the status;
# those registers are of course not determined by succession from the "last"
# instruction, but by the location of the instruction under consideration.
#
# That it is a poor choice can be seen by its uselessness: in the case of a
# non-branching instruction, the PBR and PC values are trivial; in the case of
# a branching instruction, they are unknowable.
#
# For the time being, we'll fix instruction loading in the core so that PBR and
# PC are always given correct values. While we're at it we'll add seekable
# iterators to the interface; that's needed doing for some time. This nonsense
# about sometimes needing to give a container and sometimes needing to give an
# iterable is just a bit too much trouble.
#
# In fact, that use of "blind" iterators is partly what led to the faulty PBR
# and PC tracking; the instruction() function takes a plain byte iterator, so
# it has no way of knowing what location it's reading from.
#
# Actually, is a position-aware iterator the best way to give instruction() the
# information about code location? That means that instruction() would then be
# responsible for handling address translation. Maybe it's better, actually, to
# still use the successor status method: just be less stupid about it.
#
# Think: if we read a BEQ, there are actually two possible successors; one with
# PC + len(inst), and one with PC + operand(inst). In the case of a closed
# disassembly, we are interested in following both branches; we must simply use
# the correct successor status for each branch. instruction() should return
# both statuses in that case.
#
# So, for the moment, hold off on seekable iterators. Principle of least power.
def closed_disassembly(container, offset):
pass
def closed_node(container, status):
'''
Returns a graph node from the given location
'''
src = iterfrom(container, makeadr(status.pbr, status.pc))
inst = None
while not inst or not isreturn(inst.op):
inst,succ = instruction(src,status)
# TODO: finish
def loadfile(filename):
f = open(filename, mode='rb')
return array('B', f.read())
def iterfrom(container, offset):
while True:
try:
offset += 1
yield container[offset - 1]
except IndexError:
raise StopIteration()
if __name__ == '__main__':
import sys
if len(sys.argv) < 4:
print('''Usage:
disasm.py <romfile> <dis|testwindow|recursive> <hexoffset> [flagstate]''')
exit(1)
filename = sys.argv[1]
mode = sys.argv[2]
address = int(sys.argv[3], 16)
flags = 0
if len(sys.argv) > 4:
flags = int(sys.argv[4])
rom = loadfile(filename)
if mode == 'dis':
src = iterfrom(rom, fileoffset(address))
disassemble(src, snesoffset(address), flags)
elif mode == 'testwindow':
testwindow(rom, address, flags)
elif mode == 'recursive':
m,x = flags,flags
address = snesoffset(address)
status = Status(bank(address), offset(address), m, x)
# Perform recursive disassembly
entities,status = recursive_disassemble(rom, address, status)
# Get instructions as a list, sorted by address
instructions = [(k,v) for k,v in entities.items() if isinstance(v, Instruction)]
instructions = [inst for (adr,inst) in sorted(instructions, key=lambda i: i[0])]
print('Main segment:')
for inst in instructions:
print(disassembly(inst))
# Sort subroutines by address
subroutines = [v for k,v in entities.items() if isinstance(v, Subroutine)]
subroutines = sorted(subroutines, key = lambda s: s.address)
print('Identified {} subroutines:'.format(len(subroutines)))
for i,s in enumerate(subroutines):
print('Subroutine {} (${:06X}):'.format(i, s.address))
for inst in s.instructions:
print(disassembly(inst))
print('')
#src = iterfrom(rom, address)
#disassemble(src, address + 0xC00000)