From 5620900db6377737ccfa732ddb5d7130de337d01 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:12:17 +1000 Subject: [PATCH 01/46] fix LLIL for 3.0 API --- __init__.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/__init__.py b/__init__.py index dcc06bc..8aa41f8 100644 --- a/__init__.py +++ b/__init__.py @@ -2884,12 +2884,13 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in source.get_dest_il(il, il.pop(4)) ) elif instr in ('jmp', 'bra'): - dest_il = dest.get_address_il(il) + tmpil = LowLevelILFunction(il.arch) + dstil = dest.get_address_il(tmpil) + tmpil.append(dstil) dstlabel = None - # TODO: this looks like it can't ever happen, so remove? - if dest_il in il and il[dest_il].operation == LowLevelILOperation.LLIL_CONST: - dstlabel = il.get_label_for_address(il.arch, il[dest_il].constant) + if tmpil[dstil].operation == LowLevelILOperation.LLIL_CONST: + dstlabel = il.get_label_for_address(il.arch, tmpil[dstil].constant) if dstlabel is not None: il.append( @@ -2897,7 +2898,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) else: il.append( - il.jump(dest_il) + il.jump(dest.get_address_il(il)) ) elif instr in ('jsr', 'bsr'): il.append( @@ -2912,7 +2913,9 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr in ('bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', 'bvc', 'bvs', 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble'): flag_cond = ConditionMapping.get(instr[1:], None) - dest_il = dest.get_address_il(il) + tmpil = LowLevelILFunction(il.arch) + dest_il = dest.get_address_il(tmpil) + tmpil.append(dest_il) cond_il = None if flag_cond is not None: @@ -2922,9 +2925,8 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) else: t = None - # TODO: this looks like it can't ever happen, so remove? - if dest_il in il and il[dest_il].operation == LowLevelILOperation.LLIL_CONST: - t = il.get_label_for_address(il.arch, il[dest_il].constant) + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) indirect = False @@ -2946,7 +2948,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in if indirect: il.mark_label(t) - il.append(il.jump(dest_il)) + il.append(il.jump(dest.get_address_il(il))) if not f_label_found: il.mark_label(f) @@ -2954,7 +2956,9 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', 'dbgt', 'dble'): flag_cond = ConditionMapping.get(instr[2:], None) - dest_il = dest.get_address_il(il) + tmpil = LowLevelILFunction(il.arch) + dest_il = dest.get_address_il(tmpil) + tmpil.append(dest_il) cond_il = None if flag_cond is not None: @@ -2968,9 +2972,8 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) else: branch = None - # TODO: this looks like it can't ever happen, so remove? - if dest_il in il and il[dest_il].operation == LowLevelILOperation.LLIL_CONST: - branch = il.get_label_for_address(Architecture['M68000'], il[dest_il].constant) + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + branch = il.get_label_for_address(Architecture['M68000'], tmpil[dest_il].constant) indirect = False @@ -3021,7 +3024,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in if indirect: il.mark_label(branch) - il.append(il.jump(dest_il)) + il.append(il.jump(dest.get_address_il(il))) if not skip_label_found: il.mark_label(skip) From a743fe340faf44b84d673863fed01a5651d1c9d7 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:21:50 +1000 Subject: [PATCH 02/46] porit to get_address_il2 as a workaround to require il.append() everything --- __init__.py | 249 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 161 insertions(+), 88 deletions(-) diff --git a/__init__.py b/__init__.py index 8aa41f8..bc258b8 100644 --- a/__init__.py +++ b/__init__.py @@ -189,9 +189,12 @@ def get_pre_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: def get_post_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: raise NotImplementedError - def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: raise NotImplementedError + def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + return self.get_address_il2(il)[0] + def get_source_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: raise NotImplementedError @@ -218,8 +221,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.unimplemented() + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: if self.reg == 'ccr': @@ -286,8 +290,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.unimplemented() + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) @@ -335,8 +340,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.unimplemented() + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return [il.reg(1 << self.size, reg) for reg in self.regs] @@ -367,8 +373,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.reg(4, self.reg) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -405,8 +412,11 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return (il.reg(4, self.reg1), il.reg(4, self.reg2)) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return (il.reg(4, self.reg1), il.reg(4, self.reg2)) + a = il.reg(4, self.reg1) + b = il.reg(4, self.reg2) + return ((a, b), [a, b]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) @@ -445,8 +455,9 @@ def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: ) ) - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.reg(4, self.reg) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -485,8 +496,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.reg(4, self.reg) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -527,14 +539,15 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: if self.reg == 'pc': - return il.const_pointer(4, il.current_address+2+self.offset) + r = il.const_pointer(4, il.current_address+2+self.offset) + return (r, [r]) else: - return il.add(4, - il.reg(4, self.reg), - il.const(2, self.offset) - ) + a = il.reg(4, self.reg) + b = il.const(2, self.offset) + c = il.add(4, a, b) + return (c, [a, b, c]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -582,17 +595,27 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.add(4, - il.add(4, - il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - il.const(4, self.offset) - ), - il.mult(4, - il.reg(4 if self.ireg_long else 2, self.ireg), - il.const(1, self.scale) - ) - ) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + e = il.add(4, a, b) + + c = il.reg(4 if self.ireg_long else 2, self.ireg) + d = il.const(1, self.scale) + f = il.mult(4, c, d) + + g = il.add(4, e, f) + return (g, [a, b, c, d, e, f, g]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -637,16 +660,25 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.add(4, - il.load(4, - il.add(4, - il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - il.const(4, self.offset) - ), - ), - il.const(4, self.outer_displacement) - ) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.const(4, self.outer_displacement) + + f = il.add(4, d, e) + return (f, [a, b, c, d, e, f]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -701,22 +733,36 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.add(4, - il.load(4, - il.add(4, - il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - il.const(4, self.offset) - ) - ), - il.add(4, - il.mult(4, - il.reg(4 if self.ireg_long else 2, self.ireg), - il.const(1, self.scale) - ), - il.const(4, self.outer_displacement) - ) - ) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ) + # ), + # il.add(4, + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ), + # il.const(4, self.outer_displacement) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.reg(4 if self.ireg_long else 2, self.ireg), + f = il.const(1, self.scale) + g = il.mult(4, e, f) + + h = il.const(4, self.outer_displacement) + i = il.add(4, g, h) + + j = il.add(4, d, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -771,22 +817,36 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.add(4, - il.load(4, - il.add(4, - il.add(4, - il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - il.const(4, self.offset) - ), - il.mult(4, - il.reg(4 if self.ireg_long else 2, self.ireg), - il.const(1, self.scale) - ) - ) - ), - il.const(4, self.outer_displacement) - ) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + + d = il.reg(4 if self.ireg_long else 2, self.ireg) + e = il.const(1, self.scale) + f = il.mult(4, d, e) + + g = il.add(4, c, f) + h = il.load(4, g) + + i = il.const(4, self.outer_displacement) + j = il.add(4, h, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -823,10 +883,13 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.sign_extend(self.address_width, - il.const(1 << self.address_size, self.address) - ) + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.sign_extend(self.address_width, + # il.const(1 << self.address_size, self.address) + # ) + a = il.const(1 << self.address_size, self.address) + b = il.sign_extend(self.address_width, a) + return (b, [a, b]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) @@ -858,8 +921,9 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None - def get_address_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.unimplemented() + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.const(1 << self.size, self.value) @@ -2885,12 +2949,17 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) elif instr in ('jmp', 'bra'): tmpil = LowLevelILFunction(il.arch) - dstil = dest.get_address_il(tmpil) - tmpil.append(dstil) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) dstlabel = None - if tmpil[dstil].operation == LowLevelILOperation.LLIL_CONST: - dstlabel = il.get_label_for_address(il.arch, tmpil[dstil].constant) + try: + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + dstlabel = il.get_label_for_address(il.arch, tmpil[dest_il].constant) + except: + raise if dstlabel is not None: il.append( @@ -2914,8 +2983,10 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble'): flag_cond = ConditionMapping.get(instr[1:], None) tmpil = LowLevelILFunction(il.arch) - dest_il = dest.get_address_il(tmpil) - tmpil.append(dest_il) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) cond_il = None if flag_cond is not None: @@ -2957,8 +3028,10 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in 'dbgt', 'dble'): flag_cond = ConditionMapping.get(instr[2:], None) tmpil = LowLevelILFunction(il.arch) - dest_il = dest.get_address_il(tmpil) - tmpil.append(dest_il) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) cond_il = None if flag_cond is not None: From b49e06edf28b35214fa0558261dace697d565f9c Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:22:59 +1000 Subject: [PATCH 03/46] fix llil-generation. il.mult returns a tuple??? --- __init__.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/__init__.py b/__init__.py index bc258b8..c2637a1 100644 --- a/__init__.py +++ b/__init__.py @@ -734,19 +734,19 @@ def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.load(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) + # j = il.add(4, d, i) + # d = il.load(4, c) + # c = il.add(4, a, b) + # a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # b = il.const(4, self.offset) # ) # ), - # il.add(4, - # il.mult(4, - # il.reg(4 if self.ireg_long else 2, self.ireg), - # il.const(1, self.scale) + # i = il.add(4, g, h) + # g = il.mult(4, e, f) + # e = il.reg(4 if self.ireg_long else 2, self.ireg), + # f = il.const(1, self.scale) # ), - # il.const(4, self.outer_displacement) + # h = il.const(4, self.outer_displacement) # ) # ) a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) @@ -756,7 +756,9 @@ def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[Expressio e = il.reg(4 if self.ireg_long else 2, self.ireg), f = il.const(1, self.scale) - g = il.mult(4, e, f) + # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) + # FIXME: why 'e' is a tuple with a second element missing??? + g = il.mult(4, e[0], f) h = il.const(4, self.outer_displacement) i = il.add(4, g, h) From 70f235872aec4dc35337926dc0ade2693ac59b5f Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 30 Jan 2022 14:13:05 +1100 Subject: [PATCH 04/46] disable non-base m68k variants --- __init__.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/__init__.py b/__init__.py index c2637a1..9f3b9a4 100644 --- a/__init__.py +++ b/__init__.py @@ -2652,6 +2652,9 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) ) elif instr == 'cmp2': + il.append(il.nop()) + return + skip_label_found = True skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) @@ -3700,14 +3703,14 @@ def prompt_create_vector_table(view, addr=None): PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) M68000.register() -M68008.register() -M68010.register() -M68020.register() -M68030.register() -M68040.register() -M68LC040.register() -M68EC040.register() -M68330.register() -M68340.register() - -BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) +# M68008.register() +# M68010.register() +# M68020.register() +# M68030.register() +# M68040.register() +# M68LC040.register() +# M68EC040.register() +# M68330.register() +# M68340.register() + +# BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) From ab02a1f9dd81a1fbbb2bb7f7e020123631d4f40e Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:24:19 +1000 Subject: [PATCH 05/46] Revert "disable non-base m68k variants" 70f235872aec4dc35337926dc0ade2693ac59b5f --- __init__.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/__init__.py b/__init__.py index 9f3b9a4..c2637a1 100644 --- a/__init__.py +++ b/__init__.py @@ -2652,9 +2652,6 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) ) elif instr == 'cmp2': - il.append(il.nop()) - return - skip_label_found = True skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) @@ -3703,14 +3700,14 @@ def prompt_create_vector_table(view, addr=None): PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) M68000.register() -# M68008.register() -# M68010.register() -# M68020.register() -# M68030.register() -# M68040.register() -# M68LC040.register() -# M68EC040.register() -# M68330.register() -# M68340.register() - -# BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) +M68008.register() +M68010.register() +M68020.register() +M68030.register() +M68040.register() +M68LC040.register() +M68EC040.register() +M68330.register() +M68340.register() + +BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) From 76ccf8b94336ebf770cc590acd1ee2d7c30a77b8 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:29:48 +1000 Subject: [PATCH 06/46] splitting into several files --- __init__.py | 3698 +-------------------------------------------------- m68k.py | 3697 ++++++++++++++++++++++++++++++++++++++++++++++++++ test.py | 87 ++ 3 files changed, 3785 insertions(+), 3697 deletions(-) create mode 100644 m68k.py create mode 100644 test.py diff --git a/__init__.py b/__init__.py index c2637a1..8f32d9f 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3700 +1,4 @@ -""" - -Copyright (c) 2017 Alex Forencich - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -""" - -from __future__ import print_function - -import sys - -__module__ = sys.modules[__name__] - -import binaryninja -__logger = binaryninja.Logger(0, __module__.__name__) - -log = __logger.log -log_debug = __logger.log_debug -log_info = __logger.log_info -log_warn = __logger.log_warn -log_error = __logger.log_error -log_alert = __logger.log_alert - -from typing import List, Optional, Tuple - -import struct -import traceback -import os - -from binaryninja.architecture import Architecture -from binaryninja.lowlevelil import LowLevelILLabel, LLIL_TEMP, LowLevelILFunction, ExpressionIndex -from binaryninja.function import RegisterInfo, InstructionInfo, InstructionTextToken -from binaryninja.binaryview import BinaryView -from binaryninja.plugin import PluginCommand -from binaryninja.interaction import AddressField, ChoiceField, get_form_input -from binaryninja.types import Symbol -from binaryninja.enums import (Endianness, BranchType, InstructionTextTokenType, - LowLevelILOperation, LowLevelILFlagCondition, FlagRole, SegmentFlag, - ImplicitRegisterExtend, SymbolType) -from binaryninja import BinaryViewType, lowlevelil - -log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') - -# Shift syles -SHIFT_SYLE_ARITHMETIC = 0, -SHIFT_SYLE_LOGICAL = 1, -SHIFT_SYLE_ROTATE_WITH_EXTEND = 2, -SHIFT_SYLE_ROTATE = 3, - -ShiftStyle = [ - 'as', # SHIFT_SYLE_ARITHMETIC - 'ls', # SHIFT_SYLE_LOGICAL - 'rox', # SHIFT_SYLE_ROTATE_WITH_EXTEND - 'ro' # SHIFT_SYLE_ROTATE -] - -BITFIELD_STYLE_TST = 0, -BITFIELD_STYLE_EXTU = 1, -BITFIELD_STYLE_CHG = 2, -BITFIELD_STYLE_EXTS = 3, -BITFIELD_STYLE_CLR = 4, -BITFIELD_STYLE_FFO = 5, -BITFIELD_STYLE_SET = 6, -BITFIELD_STYLE_INS = 7, - -BitfieldStyle = [ - "tst", # BITFIELD_STYLE_TST - "extu", # BITFIELD_STYLE_EXTU - "chg", # BITFIELD_STYLE_CHG - "exts", # BITFIELD_STYLE_EXTS - "clr", # BITFIELD_STYLE_CLR - "ffo", # BITFIELD_STYLE_FFO - "set", # BITFIELD_STYLE_SET - "ins", # BITFIELD_STYLE_INS -] - - -# Condition codes -CONDITION_TRUE = 0 -CONDITION_FALSE = 1 -CONDITION_HIGH = 2 -CONDITION_LESS_OR_SAME = 3 -CONDITION_CARRY_CLEAR = 4 -CONDITION_CARRY_SET = 5 -CONDITION_NOT_EQUAL = 6 -CONDITION_EQUAL = 7 -CONDITION_OVERFLOW_CLEAR = 8 -CONDITION_OVERFLOW_SET = 9 -CONDITION_PLUS = 10 -CONDITION_MINUS = 11 -CONDITION_GREATER_OR_EQUAL = 12 -CONDITION_LESS_THAN = 13 -CONDITION_GREATER_THAN = 14 -CONDITION_LESS_OR_EQUAL = 15 - -Condition = [ - 't', # CONDITION_TRUE - 'f', # CONDITION_FALSE - 'hi', # CONDITION_HIGH - 'ls', # CONDITION_LESS_OR_SAME - 'cc', # CONDITION_CARRY_CLEAR - 'cs', # CONDITION_CARRY_SET - 'ne', # CONDITION_NOT_EQUAL - 'eq', # CONDITION_EQUAL - 'vc', # CONDITION_OVERFLOW_CLEAR - 'vs', # CONDITION_OVERFLOW_SET - 'pl', # CONDITION_PLUS - 'mi', # CONDITION_MINUS - 'ge', # CONDITION_GREATER_OR_EQUAL - 'lt', # CONDITION_LESS_THAN - 'gt', # CONDITION_GREATER_THAN - 'le' # CONDITION_LESS_OR_EQUAL -] - -# Registers -REGISTER_D0 = 0 -REGISTER_D1 = 1 -REGISTER_D2 = 2 -REGISTER_D3 = 3 -REGISTER_D4 = 4 -REGISTER_D5 = 5 -REGISTER_D6 = 6 -REGISTER_D7 = 7 -REGISTER_A0 = 8 -REGISTER_A1 = 9 -REGISTER_A2 = 10 -REGISTER_A3 = 11 -REGISTER_A4 = 12 -REGISTER_A5 = 13 -REGISTER_A6 = 14 -REGISTER_A7 = 15 - -Registers = [ - 'd0', # REGISTER_D0 - 'd1', # REGISTER_D1 - 'd2', # REGISTER_D2 - 'd3', # REGISTER_D3 - 'd4', # REGISTER_D4 - 'd5', # REGISTER_D5 - 'd6', # REGISTER_D6 - 'd7', # REGISTER_D7 - 'a0', # REGISTER_A0 - 'a1', # REGISTER_A1 - 'a2', # REGISTER_A2 - 'a3', # REGISTER_A3 - 'a4', # REGISTER_A4 - 'a5', # REGISTER_A5 - 'a6', # REGISTER_A6 - 'sp' # REGISTER_A7 -] - -# Sizes -SIZE_BYTE = 0 -SIZE_WORD = 1 -SIZE_LONG = 2 - -SizeSuffix = [ - '.b', # SIZE_BYTE - '.w', # SIZE_WORD - '.l', # SIZE_LONG -] - -# Operands - -class Operand: - def format(self, addr: int) -> List[InstructionTextToken]: - raise NotImplementedError - - def get_pre_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_post_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_address_il2(self, il: LowLevelILFunction) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - raise NotImplementedError - - def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - return self.get_address_il2(il)[0] - - def get_source_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> Optional[ExpressionIndex]: - raise NotImplementedError - -class OpRegisterDirect(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterDirect(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # a0, d0 - return [ - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - if self.reg == 'ccr': - c = il.flag_bit(1, 'c', 0) - v = il.flag_bit(1, 'v', 1) - z = il.flag_bit(1, 'z', 2) - n = il.flag_bit(1, 'n', 3) - x = il.flag_bit(1, 'x', 4) - return il.or_expr(1, il.or_expr(1, il.or_expr(1, il.or_expr(1, c, v), z), n), x) - else: - return il.reg(1 << self.size, self.reg) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'ccr': - return il.unimplemented() - - # return il.set_reg(1 << self.size, self.reg, value) - # if self.size == SIZE_BYTE: - # if self.reg[0] == 'a' or self.reg == 'sp': - # return None - # else: - # return il.set_reg(1, self.reg+'.b', value, flags) - # elif self.size == SIZE_WORD: - # return il.set_reg(2, self.reg+'.w', value, flags) - # else: - # return il.set_reg(4, self.reg, value, flags) - if self.size == SIZE_BYTE: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.unimplemented() - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffffff00), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xff), value)), flags) - elif self.size == SIZE_WORD: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.set_reg(4, self.reg, il.sign_extend(4, value), flags) - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffff0000), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xffff), value)), flags) - else: - if value: - return il.set_reg(4, self.reg, value, flags) - else: - return il.unimplemented() - - -class OpRegisterDirectPair(Operand): - def __init__(self, size: int, reg1: str, reg2: str): - self.size = size - self.reg1 = reg1 - self.reg2 = reg2 - - def __repr__(self): - return "OpRegisterDirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0:d1 - return [ - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), - InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - return (il.set_reg(1 << self.size, self.reg1, values[0], flags), il.set_reg(1 << self.size, self.reg2, values[1], flags)) - - -class OpRegisterMovemList(Operand): - def __init__(self, size: int, regs: List[str]): - self.size = size - self.regs = regs - - def __repr__(self): - return "OpRegisterMovemList(%d, %s)" % (self.size, repr(self.regs)) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0-d7/a0/a2/a4-a7 - if len(self.regs) == 0: - return [] - tokens = [InstructionTextToken(InstructionTextTokenType.RegisterToken, self.regs[0])] - last = self.regs[0] - first = None - for reg in self.regs[1:]: - if Registers[Registers.index(last)+1] == reg and reg != 'a0': - if first is None: - first = last - last = reg - else: - if first is not None: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "/")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, reg)) - first = None - last = reg - if first is not None: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return [il.reg(1 << self.size, reg) for reg in self.regs] - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - return [il.set_reg(1 << self.size, reg, val, flags) for reg, val in zip(self.regs, values)] - - -class OpRegisterIndirect(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirect(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # (a0) - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectPair(Operand): - def __init__(self, size: int, reg1: str, reg2: str): - self.size = size - self.reg1 = reg1 - self.reg2 = reg2 - - def __repr__(self): - return "OpRegisterIndirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0:d1 - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), - InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return (il.reg(4, self.reg1), il.reg(4, self.reg2)) - a = il.reg(4, self.reg1) - b = il.reg(4, self.reg2) - return ((a, b), [a, b]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - #return (il.store(1 << self.size, il.reg(4, self.reg1), values[0], flags), il.store(1 << self.size, il.reg(4, self.reg2), values[1], flags)) - return (il.store(1 << self.size, il.reg(4, self.reg1), values[0]), il.store(1 << self.size, il.reg(4, self.reg2), values[1])) - - -class OpRegisterIndirectPostincrement(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirectPostincrement(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # (a0)+ - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), - InstructionTextToken(InstructionTextTokenType.TextToken, "+") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.set_reg(4, - self.reg, - il.add(4, - il.reg(4, self.reg), - il.const(4, 1 << self.size) - ) - ) - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectPredecrement(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirectPredecrement(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # -(a0) - return [ - InstructionTextToken(InstructionTextTokenType.TextToken, "-"), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.set_reg(4, - self.reg, - il.sub(4, - il.reg(4, self.reg), - il.const(4, 1 << self.size) - ) - ) - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectDisplacement(Operand): - def __init__(self, size: int, reg: str, offset: int): - self.size = size - self.reg = reg - self.offset = offset - - def __repr__(self): - return "OpRegisterIndirectDisplacement(%d, %s, 0x%x)" % (self.size, self.reg, self.offset) - - def format(self, addr: int) -> List[InstructionTextToken]: - if self.reg == 'pc': - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:08x}".format(addr+2+self.offset), addr+2+self.offset, 4), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - else: - # $1234(a0) - return [ - InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:04x}".format(self.offset), self.offset, 2), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - if self.reg == 'pc': - r = il.const_pointer(4, il.current_address+2+self.offset) - return (r, [r]) - else: - a = il.reg(4, self.reg) - b = il.const(2, self.offset) - c = il.add(4, a, b) - return (c, [a, b, c]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectIndex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: int, scale: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - - def __repr__(self): - return "OpRegisterIndirectIndex(%d, %s, 0x%x, %s, %d, %d)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale) - - def format(self, addr: int) -> List[InstructionTextToken]: - # $1234(a0,a1.l*4) - tokens = [] - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # il.mult(4, - # il.reg(4 if self.ireg_long else 2, self.ireg), - # il.const(1, self.scale) - # ) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - e = il.add(4, a, b) - - c = il.reg(4 if self.ireg_long else 2, self.ireg) - d = il.const(1, self.scale) - f = il.mult(4, c, d) - - g = il.add(4, e, f) - return (g, [a, b, c, d, e, f, g]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirect(Operand): - def __init__(self, size: int, reg: str, offset: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirect(%d, %s, %d, %d)" % (self.size, self.reg, self.offset, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0],$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.load(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # ), - # il.const(4, self.outer_displacement) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - d = il.load(4, c) - - e = il.const(4, self.outer_displacement) - - f = il.add(4, d, e) - return (f, [a, b, c, d, e, f]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirectPostindex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirectPostindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0],a1.l*4,$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # j = il.add(4, d, i) - # d = il.load(4, c) - # c = il.add(4, a, b) - # a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # b = il.const(4, self.offset) - # ) - # ), - # i = il.add(4, g, h) - # g = il.mult(4, e, f) - # e = il.reg(4 if self.ireg_long else 2, self.ireg), - # f = il.const(1, self.scale) - # ), - # h = il.const(4, self.outer_displacement) - # ) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - d = il.load(4, c) - - e = il.reg(4 if self.ireg_long else 2, self.ireg), - f = il.const(1, self.scale) - # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) - # FIXME: why 'e' is a tuple with a second element missing??? - g = il.mult(4, e[0], f) - - h = il.const(4, self.outer_displacement) - i = il.add(4, g, h) - - j = il.add(4, d, i) - return (j, [a, b, c, d, e, f, g, h, i, j]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirectPreindex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirectPreindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0,a1.l*4],$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.load(4, - # il.add(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # il.mult(4, - # il.reg(4 if self.ireg_long else 2, self.ireg), - # il.const(1, self.scale) - # ) - # ) - # ), - # il.const(4, self.outer_displacement) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - - d = il.reg(4 if self.ireg_long else 2, self.ireg) - e = il.const(1, self.scale) - f = il.mult(4, d, e) - - g = il.add(4, c, f) - h = il.load(4, g) - - i = il.const(4, self.outer_displacement) - j = il.add(4, h, i) - return (j, [a, b, c, d, e, f, g, h, i, j]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpAbsolute(Operand): - def __init__(self, size, address, address_size, address_width): - self.size = size - self.address = address - self.address_size = address_size - self.address_width = address_width - - def __repr__(self): - return "OpAbsolute(%d, 0x%x, %d, %d)" % (self.size, self.address, self.address_size, self.address_width) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ($1234).w - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.address, 1 << self.address_size), self.address, 1 << self.address_size), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"+SizeSuffix[self.address_size]) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.sign_extend(self.address_width, - # il.const(1 << self.address_size, self.address) - # ) - a = il.const(1 << self.address_size, self.address) - b = il.sign_extend(self.address_width, a) - return (b, [a, b]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpImmediate(Operand): - def __init__(self, size, value): - self.size = size - self.value = value - - def __repr__(self): - return "OpImmediate(%d, 0x%x)" % (self.size, self.value) - - def format(self, addr: int) -> List[InstructionTextToken]: - # #$1234 - return [ - InstructionTextToken(InstructionTextTokenType.TextToken, "#"), - #InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) - InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.const(1 << self.size, self.value) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - return il.unimplemented() - - -# condition mapping to LLIL flag conditions -ConditionMapping = { - 'hi': LowLevelILFlagCondition.LLFC_UGT, - 'ls': LowLevelILFlagCondition.LLFC_ULE, - 'cc': LowLevelILFlagCondition.LLFC_UGE, - 'cs': LowLevelILFlagCondition.LLFC_ULT, - 'ne': LowLevelILFlagCondition.LLFC_NE, - 'eq': LowLevelILFlagCondition.LLFC_E, - 'vc': LowLevelILFlagCondition.LLFC_NO, - 'vs': LowLevelILFlagCondition.LLFC_O, - 'pl': LowLevelILFlagCondition.LLFC_POS, - 'mi': LowLevelILFlagCondition.LLFC_NEG, - 'ge': LowLevelILFlagCondition.LLFC_SGE, - 'lt': LowLevelILFlagCondition.LLFC_SLT, - 'gt': LowLevelILFlagCondition.LLFC_SGT, - 'le': LowLevelILFlagCondition.LLFC_SLE, -} - -class M68000(Architecture): - name = "M68000" - address_size = 3 - default_int_size = 4 - max_instr_length = 22 - endianness = Endianness.BigEndian - regs = { - 'd0': RegisterInfo('d0', 4), - 'd1': RegisterInfo('d1', 4), - 'd2': RegisterInfo('d2', 4), - 'd3': RegisterInfo('d3', 4), - 'd4': RegisterInfo('d4', 4), - 'd5': RegisterInfo('d5', 4), - 'd6': RegisterInfo('d6', 4), - 'd7': RegisterInfo('d7', 4), - 'a0': RegisterInfo('a0', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a1': RegisterInfo('a1', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a2': RegisterInfo('a2', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a3': RegisterInfo('a3', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a4': RegisterInfo('a4', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a5': RegisterInfo('a5', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'a6': RegisterInfo('a6', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - 'sp': RegisterInfo('sp', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), - - 'sr': RegisterInfo('sr', 2), - 'ccr': RegisterInfo('sr', 1), - - # control registers - # MC68010/MC68020/MC68030/MC68040/CPU32 - 'sfc': RegisterInfo('sfc', 4), - 'dfc': RegisterInfo('dfc', 4), - 'usp': RegisterInfo('usp', 4), - 'vbr': RegisterInfo('vbr', 4), - # MC68020/MC68030/MC68040 - 'cacr': RegisterInfo('cacr', 4), - 'caar': RegisterInfo('caar', 4), - 'msp': RegisterInfo('msp', 4), - 'isp': RegisterInfo('isp', 4), - # MC68040/MC68LC040 - 'tc': RegisterInfo('tc', 4), - 'itt0': RegisterInfo('itt0', 4), - 'itt1': RegisterInfo('itt1', 4), - 'dtt0': RegisterInfo('dtt0', 4), - 'dtt1': RegisterInfo('dtt1', 4), - 'mmusr': RegisterInfo('mmusr', 4), - 'urp': RegisterInfo('urp', 4), - 'srp': RegisterInfo('srp', 4), - # MC68EC040 - 'iacr0': RegisterInfo('iacr0', 4), - 'iacr1': RegisterInfo('iacr1', 4), - 'dacr0': RegisterInfo('dacr0', 4), - 'dacr1': RegisterInfo('dacr1', 4), - } - stack_pointer = 'sp' - flags = ['x', 'n', 'z', 'v', 'c'] - flag_write_types = ['*', 'nzvc'] - flags_written_by_flag_write_type = { - '*': ['x', 'n', 'z', 'v', 'c'], - 'nzvc': ['n', 'z', 'v', 'c'], - } - flag_roles = { - 'x': FlagRole.SpecialFlagRole, - 'n': FlagRole.NegativeSignFlagRole, - 'z': FlagRole.ZeroFlagRole, - 'v': FlagRole.OverflowFlagRole, - 'c': FlagRole.CarryFlagRole, - } - flags_required_for_flag_condition = { - LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], # hi - LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], # ls - LowLevelILFlagCondition.LLFC_UGE: ['c'], # cs - LowLevelILFlagCondition.LLFC_ULT: ['c'], # cs - LowLevelILFlagCondition.LLFC_NE: ['z'], # ne - LowLevelILFlagCondition.LLFC_E: ['z'], # eq - LowLevelILFlagCondition.LLFC_NO: ['v'], # vc - LowLevelILFlagCondition.LLFC_O: ['v'], # vs - LowLevelILFlagCondition.LLFC_POS: ['n'], # pl - LowLevelILFlagCondition.LLFC_NEG: ['n'], # mi - LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], # ge - LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], # lt - LowLevelILFlagCondition.LLFC_SGT: ['n', 'v', 'z'], # gt - LowLevelILFlagCondition.LLFC_SLE: ['n', 'v', 'z'], # le - } - control_registers = { - } - memory_indirect = False - movem_store_decremented = False - - def decode_effective_address(self, mode: int, register: int, data: bytes, size: Optional[int] = None) -> Tuple[Optional[Operand], Optional[int]]: - mode &= 0x07 - register &= 0x07 - - reg = None - - if mode == 0: - # data register direct - return (OpRegisterDirect(size, Registers[register]), 0) - elif mode == 1: - # address register direct - return (OpRegisterDirect(size, Registers[register+8]), 0) - elif mode == 2: - # address register indirect - return (OpRegisterIndirect(size, Registers[register+8]), 0) - elif mode == 3: - # address register indirect with postincrement - return (OpRegisterIndirectPostincrement(size, Registers[register+8]), 0) - elif mode == 4: - # address register indirect with predecrement - return (OpRegisterIndirectPredecrement(size, Registers[register+8]), 0) - elif mode == 5: - # address register indirect with displacement - return (OpRegisterIndirectDisplacement(size, Registers[register+8], struct.unpack_from('>h', data, 0)[0]), 2) - elif mode == 6: - # extended addressing mode - reg = Registers[register+8] - elif mode == 7: - if register == 0: - # absolute short - val = struct.unpack_from('>H', data, 0)[0] - if val & 0x8000: - if self.address_size == 4: - val |= 0xffff0000 # extend to 32-bits - else: - val |= 0xff0000 # extend to 24-bits (for 68000) - return (OpAbsolute(size, val, 1, self.address_size), 2) - if register == 1: - # absolute long - return (OpAbsolute(size, struct.unpack_from('>L', data, 0)[0], 2, self.address_size), 4) - elif register == 2: - # program counter indirect with displacement - return (OpRegisterIndirectDisplacement(size, 'pc', struct.unpack_from('>h', data, 0)[0]), 2) - elif register == 3: - # extended addressing mode - reg = 'pc' - elif register == 4: - # immediate - if size == None: - # unspecified length - return (OpImmediate(size, None), None) - elif size == SIZE_BYTE: - # byte - return (OpImmediate(size, struct.unpack_from('>b', data, 1)[0]), 2) - elif size == 1: - # word - return (OpImmediate(size, struct.unpack_from('>h', data, 0)[0]), 2) - elif size == 2: - # long - return (OpImmediate(size, struct.unpack_from('>l', data, 0)[0]), 4) - - if reg is not None: - extra = struct.unpack_from('>H', data, 0)[0] - # index register - xn = Registers[extra >> 12] - # index register size - index_size = (extra >> 11) & 1 - # index register scale - scale = 1 << ((extra >> 9) & 3) - length = 2 - - if extra & 0x0100: - # full extension word - bd = 0 - od = 0 - - # base displacement - if not (extra >> 7) & 1: - if (extra >> 4) & 3 == 2: - # word base displacement - bd = struct.unpack_from('>h', data, length)[0] - length += 2 - elif (extra >> 4) & 3 == 3: - # long base displacement - bd = struct.unpack_from('>L', data, length)[0] - length += 4 - - # outer displacement - if extra & 3 == 2: - # word outer displacement - od = struct.unpack_from('>h', data, length)[0] - length += 2 - elif extra & 3 == 3: - # long outer displacement - od = struct.unpack_from('>L', data, length)[0] - length += 4 - - # suppress index register - if extra & 7 == 0: - return (OpRegisterIndirectIndex(size, reg, bd, xn, index_size, scale), length) - elif (extra >> 6) & 1: - return (OpMemoryIndirect(size, reg, bd, od), length) - elif (extra >> 2) & 1: - return (OpMemoryIndirectPostindex(size, reg, bd, xn, index_size, scale, od), length) - else: - return (OpMemoryIndirectPreindex(size, reg, bd, xn, index_size, scale, od), length) - else: - # brief extension word - # 8 bit displacement - d8 = extra & 0xff - if d8 & 0x80: - d8 -= 256 - return (OpRegisterIndirectIndex(size, reg, d8, xn, index_size, scale), length) - - return (None, None) - - def decode_instruction(self, data: bytes, addr: int) -> Tuple[str, int, Optional[int], Optional[Operand], Optional[Operand], Optional[Operand]]: - error_value = ('unimplemented', len(data), None, None, None, None) - if len(data) < 2: - return error_value - - instruction = struct.unpack_from('>H', data)[0] - - msb = instruction >> 8 - operation_code = msb >> 4 - - #print((hex(addr), hex(instruction))) - - instr = None - length = None - size = None - source = None - dest = None - third = None - - if operation_code == 0x0: - # Bit manipulation/MOVEP/Immed late - if instruction & 0xf9c0 == 0x00c0: - # rtm, callm, chk2, cmp2 - if instruction & 0xfff0 == 0x06c0: - instr = 'rtm' - dest = OpRegisterDirect(SIZE_LONG, Registers[instruction & 15]) - length = 2 - elif instruction & 0xffc0 == 0x06c0: - instr = 'callm' - source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check - if extra_dest is None: - return error_value - length = 4+extra_dest - else: - size = (instruction >> 9) & 3 - extra = struct.unpack_from('>H', data, 2)[0] - if extra & 0x0800: - instr = 'chk2' - else: - instr = 'cmp2' - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check - dest = OpRegisterDirect(size, Registers[(instruction >> 12) & 15]) - if extra_source is None: - return error_value - length = 4+extra_source - elif instruction & 0xffc0 in (0x0ac0, 0x0cc0, 0x0ec0): - if instruction & 0xf9ff == 0x08fc: - instr = 'cas2' - size = ((instruction >> 9) & 3) - 1 - extra1 = struct.unpack_from('>H', data, 2)[0] - extra2 = struct.unpack_from('>H', data, 4)[0] - source = OpRegisterDirectPair(size, Registers[extra1 & 7], Registers[extra2 & 7]) - dest = OpRegisterDirectPair(size, Registers[(extra1 >> 6) & 7], Registers[(extra2 >> 6) & 7]) - third = OpRegisterIndirectPair(size, Registers[(extra1 >> 12) & 15], Registers[(extra2 >> 12) & 15]) - length = 6 - else: - instr = 'cas' - size = ((instruction >> 9) & 3) - 1 - extra = struct.unpack_from('>H', data, 2)[0] - source = OpRegisterDirect(size, Registers[extra & 7]) - dest = OpRegisterDirect(size, Registers[(extra >> 6) & 7]) - third, extra_third = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) - if extra_third is None: - return error_value - length = 4+extra_third - elif msb in (0x00, 0x02, 0x04, 0x06, 0x0a, 0x0c): - # ORI, ANDI, SUBI, ADDI, EORI, CMPI - if msb == 0x00: - instr = 'ori' - elif msb == 0x02: - instr = 'andi' - elif msb == 0x04: - instr = 'subi' - elif msb == 0x06: - instr = 'addi' - elif msb == 0x0a: - instr = 'eori' - elif msb == 0x0c: - instr = 'cmpi' - size = (instruction >> 6) & 0x03 - source, extra_source = self.decode_effective_address(7, 4, data[2:], size) - if instruction & 0x00ff == 0x003c: - dest = OpRegisterDirect(size, 'ccr') - extra_dest = 0 - elif instruction & 0x00ff == 0x007c: - dest = OpRegisterDirect(size, 'sr') - extra_dest = 0 - else: - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - - if dest is None: - instr = None - else: - length = 2+extra_source+extra_dest - elif msb == 0x08: - # btst, bchg, bclr, bset with constant - if instruction & 0xffc0 == 0x0800: - instr = 'btst' - elif instruction & 0xffc0 == 0x0840: - instr = 'bchg' - elif instruction & 0xffc0 == 0x0880: - instr = 'bclr' - elif instruction & 0xffc0 == 0x08C0: - instr = 'bset' - source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) - if isinstance(dest, OpRegisterDirect): - dest.size = SIZE_LONG - if dest is None: - instr = None - else: - length = 4+extra_dest - elif msb & 0xf1 == 0x01: - # movep, btst, bchg, bclr, bset with register - if instruction & 0xf138 == 0x0108: - instr = 'movep' - size = ((instruction >> 6) & 1) + 1 - source, extra_source = self.decode_effective_address(5, instruction, data[2:], SIZE_BYTE) # check - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - length = 2+extra_source - if instruction & 0x0080: - source, dest = dest, source - else: - if instruction & 0xf1c0 == 0x0100: - instr = 'btst' - elif instruction & 0xf1c0 == 0x0140: - instr = 'bchg' - elif instruction & 0xf1c0 == 0x0180: - instr = 'bclr' - elif instruction & 0xf1c0 == 0x01c0: - instr = 'bset' - source = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) # check - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) - if isinstance(dest, OpRegisterDirect): - dest.size = SIZE_LONG - if dest is None: - instr = None - else: - length = 2+extra_dest - elif instruction & 0xff00 == 0x0e00: - instr = 'moves' - extra = struct.unpack_from('>H', data, 2)[0] - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[extra >> 12]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) - if extra & 0x0800: - source, dest = dest, source - if extra_source is None: - return error_value - length = 4+extra_source - elif operation_code in (0x1, 0x2, 0x3): - # move - instr = 'move' - if operation_code == 0x1: - # Move byte - size = SIZE_BYTE - elif operation_code == 0x2: - # Move long - size = SIZE_LONG - elif operation_code == 0x3: - # Move word - size = SIZE_WORD - - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if source is None: - instr = None - else: - dest, extra_dest = self.decode_effective_address(instruction >> 6, instruction >> 9, data[2+extra_source:], size) - if dest is None or isinstance(dest, OpImmediate): - instr = None - else: - if isinstance(dest, OpRegisterDirect) and (dest.reg[0] == 'a' or dest.reg == 'sp'): - instr = 'movea' - length = 2+extra_source+extra_dest - elif operation_code == 0x4: - # Miscellaneous - extra_source = 0 - extra_dest = 0 - size = None - skip_ea = False - if instruction & 0xf100 == 0x4100: - # lea, extb, chk - if instruction & 0xf1c0 == 0x41c0: - if instruction & 0x0038: - instr = 'lea' - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - instr = 'extb' - size = SIZE_LONG - else: - instr = 'chk' - if instruction & 0x0080: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - elif msb == 0x40: - # move from sr, negx - if instruction & 0xffc0 == 0x40c0: - # move from sr - instr = 'move' - size = SIZE_WORD - source = OpRegisterDirect(size, 'sr') - else: - instr = 'negx' - size = instruction >> 6 - elif msb == 0x42: - # move to ccr, clr - if instruction & 0xffc0 == 0x42c0: - # move to ccr - instr = 'move' - size = SIZE_WORD - source = OpRegisterDirect(size, 'ccr') - else: - instr = 'clr' - size = instruction >> 6 - elif msb == 0x44: - # move from ccr, neg - if instruction & 0xffc0 == 0x44c0: - # move from ccr - instr = 'move' - size = SIZE_WORD - dest = OpRegisterDirect(size, 'ccr') - else: - instr = 'neg' - size = instruction >> 6 - elif msb == 0x46: - # move from sr, not - if instruction & 0xffc0 == 0x46c0: - # move from sr - instr = 'move' - size = SIZE_WORD - dest = OpRegisterDirect(size, 'sr') - else: - instr = 'not' - size = instruction >> 6 - elif msb in (0x48, 0x4c): - # link, nbcd, movem, ext, swap, bkpt, pea, divs, divu, divsl, divul, muls, mulu - if instruction & 0xfff8 == 0x4808: - instr = 'link' - size = SIZE_LONG - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], size) - elif instruction & 0xffc0 == 0x4800: - instr = 'nbcd' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_BYTE) - skip_ea = True - elif instruction & 0xfb80 == 0x4880: - if instruction & 0x0040: - size = SIZE_LONG - else: - size = SIZE_WORD - if instruction & 0x0038: - instr = 'movem' - extra_source = 2 - extra = struct.unpack_from('>H', data, 2)[0] - reg_list = [] - if instruction & 0x0038 == 0x0020: - for k in range(16): - if extra << k & 0x8000: - reg_list.append(Registers[k]) - else: - for k in range(16): - if extra >> k & 0x0001: - reg_list.append(Registers[k]) - source = OpRegisterMovemList(size, reg_list) - else: - instr = 'ext' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - skip_ea = True - if instruction & 0x0400: - source, dest = dest, source - elif instruction & 0xfff8 == 0x4840: - instr = 'swap' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) - skip_ea = True - elif instruction & 0xfff8 == 0x4848: - instr = 'bkpt' - source = OpImmediate(SIZE_BYTE, instruction & 7) - skip_ea = True - elif instruction & 0xffc0 == 0x4840: - instr = 'pea' - size = SIZE_LONG - elif msb == 0x4c: - size = SIZE_LONG - extra_dest = 2 - extra = struct.unpack_from('>H', data, 2)[0] - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) - dh = Registers[extra & 7] - dl = Registers[(extra >> 12) & 7] - dest = OpRegisterDirect(size, dl) - if instruction & 0x0040: - if extra & 0x0800: - instr = 'divs' - else: - instr = 'divu' - if extra & 0x0400: - dest = OpRegisterDirectPair(size, dh, dl) - elif dh != dl: - dest = OpRegisterDirectPair(size, dh, dl) - instr += 'l' - else: - if extra & 0x0800: - instr = 'muls' - else: - instr = 'mulu' - if extra & 0x0400: - dest = OpRegisterDirectPair(size, dh, dl) - skip_ea = True - elif msb == 0x4a: - # bgnd, illegal, tas, tst - if instruction == 0x4afa: - instr = 'bgnd' - skip_ea = True - elif instruction == 0x4afc: - instr = 'illegal' - skip_ea = True - elif instruction & 0xffc0 == 0x4ac0: - instr = 'tas' - skip_ea = True - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) - else: - instr = 'tst' - size = instruction >> 6 - elif msb == 0x4e: - # trap, link, unlk, move, reset, nop, stop, rte, rtd, rts, trapv, rtr, movec, jsr, jmp - if instruction & 0xfff0 == 0x4e40: - instr = 'trap' - length = 2 - source = OpImmediate(SIZE_BYTE, instruction & 15) - skip_ea = True - elif instruction & 0xfff0 == 0x4e50: - if instruction & 0xfff8 == 0x4e50: - instr = 'link' - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], 1) - else: - instr = 'unlk' - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) - skip_ea = True - elif instruction & 0xfff0 == 0x4e60: - instr = 'move' - size = SIZE_LONG - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) - dest = OpRegisterDirect(size, 'usp') - if instruction & 0x08: - source, dest = dest, source - skip_ea = True - elif instruction == 0x4e70: - instr = 'reset' - skip_ea = True - elif instruction == 0x4e71: - instr = 'nop' - skip_ea = True - elif instruction == 0x4e72: - instr = 'stop' - source = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) - extra_source = 2 - skip_ea = True - elif instruction == 0x4e73: - instr = 'rte' - skip_ea = True - elif instruction == 0x4e74: - instr = 'rtd' - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], SIZE_WORD) - skip_ea = True - elif instruction == 0x4e75: - instr = 'rts' - skip_ea = True - elif instruction == 0x4e76: - instr = 'trapv' - skip_ea = True - elif instruction == 0x4e77: - instr = 'rtr' - skip_ea = True - elif instruction & 0xfffe == 0x4e7A: - instr = 'movec' - size = SIZE_LONG - extended = struct.unpack_from('>H', data, 2)[0] - control_reg = self.control_registers.get(extended & 0x0fff, None) - reg = (extended >> 12) & 15 - if control_reg is None: - instr = None - else: - source = OpRegisterDirect(size, control_reg) - dest = OpRegisterDirect(size, Registers[reg]) - if instruction & 1: - source, dest = dest, source - extra_source = 2 - skip_ea = True - elif instruction & 0xff80 == 0x4e80: - if instruction & 0xffc0 == 0x4e80: - instr = 'jsr' - else: - instr = 'jmp' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) - skip_ea = True - if instr is not None: - if size is not None: - size &= 3 - if skip_ea: - pass - elif dest is None: - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - else: - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) - if extra_source is None or extra_dest is None: - instr = None - else: - length = 2+extra_source+extra_dest - elif operation_code == 0x5: - # ADDQ/SUBQ/Scc/DBcc/TRAPcc - if instruction & 0xf0c0 == 0x50c0: - if instruction & 0xf0f8 == 0x50c8: - instr = 'db'+Condition[(instruction >> 8) & 0xf] - source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) - dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', struct.unpack_from('>h', data, 2)[0]) - length = 4 - elif instruction & 0xf0ff in (0x50fa, 0x50fb, 0x50fc): - instr = 'trap'+Condition[(instruction >> 8) & 0xf] - if instruction & 7 == 2: - length = 4 - source = OpImmediate(SIZE_WORD, struct.unpack_from('>H', data, 2)[0]) - elif instruction & 7 == 3: - length = 6 - source = OpImmediate(SIZE_LONG, struct.unpack_from('>L', data, 2)[0]) - elif instruction & 7 == 4: - length = 2 - else: - instr = 's'+Condition[(instruction >> 8) & 0xf] - size = SIZE_BYTE - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_dest is None: - return error_value - length = 2+extra_dest - else: - if instruction & 0x0100: - instr = 'subq' - else: - instr = 'addq' - val = (instruction >> 9) & 7 - if val == 0: - val = 8 - size = (instruction >> 6) & 3 - source = OpImmediate(SIZE_BYTE, val) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_dest is None: - return error_value - length = 2+extra_dest - elif operation_code == 0x6: - # Bcc/BSR/BRA - if msb == 0x60: - instr = 'bra' - elif msb == 0x61: - instr = 'bsr' - else: - instr = 'b'+Condition[(instruction >> 8) & 0xf] - val = instruction & 0xff - if val == 0: - val = struct.unpack_from('>h', data, 2)[0] - length = 4 - elif val == 0xff: - val = struct.unpack_from('>L', data, 2)[0] - length = 6 - else: - if val & 0x80: - val -= 256 - length = 2 - dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', val) - elif operation_code == 0x7: - # MOVEQ - instr = 'moveq' - size = SIZE_LONG - val = instruction & 0xff - if val & 0x80: - val |= 0xffffff00 - source = OpImmediate(size, val) - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - length = 2 - elif operation_code == 0x8: - # OR/DIV/SBCD - if instruction & 0xf0c0 == 0x80c0: - if instruction & 0x0100: - instr = 'divs' - else: - instr = 'divu' - size = SIZE_WORD - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_source is None: - return error_value - length = 2+extra_source - elif instruction & 0xf1f0 == 0x8100: - instr = 'sbcd' - length = 2 - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - elif instruction & 0xf130 == 0x8100: - if instruction & 0x0040: - instr = 'pack' - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[(instruction & 7) + 8]) - else: - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) - else: - instr = 'unpk' - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - else: - dest = OpRegisterDirect(SIZE_WORD, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - length = 4 - third = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) - else: - instr = 'or' - opmode = (instruction >> 6) & 0x7 - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if opmode & 4: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0x9: - # SUB/SUBA/SUBX - instr = 'sub' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'suba' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'sub' and opmode & 4: - if isinstance(source, OpRegisterDirect): - instr = 'subx' - if source.reg[0] == 'a' or source.reg == 'sp': - source = OpRegisterIndirectPredecrement(size, source.reg) - dest = OpRegisterIndirectPredecrement(size, dest.reg) - else: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xa: - # (unassigned, reserved) - pass - elif operation_code == 0xb: - # CMP/EOR - instr = 'cmp' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'cmpa' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(size, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'cmp' and opmode & 4: - if instruction & 0x0038 == 0x0008: - instr = 'cmpm' - source = OpRegisterIndirectPostincrement(size, Registers[instruction & 15]) - dest = OpRegisterIndirectPostincrement(size, Registers[((instruction >> 9) & 7) + 8]) - else: - source, dest = dest, source - instr = 'eor' - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xc: - # AND/MUL/ABCD/EXG - if instruction & 0xf0c0 == 0xc0c0: - if instruction & 0x0100: - instr = 'muls' - else: - instr = 'mulu' - size = SIZE_WORD - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - if extra_source is None: - return error_value - length = 2+extra_source - elif instruction & 0xf130 == 0xc100: - if instruction & 0xf1f0 == 0xc100: - instr = 'abcd' - if instruction & 0x0008: - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - else: - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - else: - instr = 'exg' - size = SIZE_LONG - source = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - dest = OpRegisterDirect(size, Registers[instruction & 7]) - if instruction & 0xf1f8 == 0xc148: - source = OpRegisterIndirectPredecrement(size, Registers[((instruction >> 9) & 7) + 8]) - dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) - if instruction & 0xf1f8 == 0xc188: - dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) - length = 2 - else: - instr = 'and' - opmode = (instruction >> 6) & 0x7 - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if opmode & 4: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xd: - # ADD/ADDA/ADDX - instr = 'add' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'adda' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'add' and opmode & 4: - if isinstance(source, OpRegisterDirect): - instr = 'addx' - if source.reg[0] == 'a' or source.reg == 'sp': - source = OpRegisterIndirectPredecrement(size, source.reg) - dest = OpRegisterIndirectPredecrement(size, dest.reg) - else: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xe: - # shift/rotate/bit field - if instruction & 0xF8C0 == 0xE0C0: - # shift/rotate - size = SIZE_WORD - direction = (instruction >> 8) & 1 - style = (instruction >> 9) & 3 - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - instr = ShiftStyle[style] - if direction: - instr += 'l' - else: - instr += 'r' - if extra_dest is None: - return error_value - length = 2+extra_dest - elif instruction & 0xF8C0 == 0xE8C0: - # bit field instructions - # TODO - style = (instruction >> 8) & 0x7 - instr = 'bf'+BitfieldStyle[style] - length = 4 - else: - # shift/rotate - size = (instruction >> 6) & 3 - direction = (instruction >> 8) & 1 - style = (instruction >> 3) & 3 - if (instruction >> 5) & 1: - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction >> 9) & 7]) - else: - val = (instruction >> 9) & 7 - if val == 0: - val = 8 - source = OpImmediate(SIZE_BYTE, val) - dest = OpRegisterDirect(size, Registers[instruction & 7]) - instr = ShiftStyle[style] - if direction: - instr += 'l' - else: - instr += 'r' - length = 2 - elif operation_code == 0xf: - if instruction & 0xff20 == 0xf420: - instr = 'cpush' - length = 2 - elif instruction & 0xff80 == 0xff80: - instruction = 'illFF' - length = 2 - # coprocessor instructions - # TODO - if instr is None: - # FIXME uncomment to debug - #log_error('Bad opcode at 0x{:x}'.format(addr)) - return error_value - - #print((instr, length, size, source, dest, third)) - return instr, length, size, source, dest, third - - def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: int, size: int, source: Optional[Operand], dest: Optional[Operand], third: Optional[Operand]): - size_bytes = None - if size is not None: - size_bytes = 1 << size - - if instr in ('move', 'moveq'): - if instr == 'move' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): - il.append(il.set_reg(1, LLIL_TEMP(0), source.get_source_il(il))) - il.append(il.set_flag('c', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x01)))) - il.append(il.set_flag('v', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x02)))) - il.append(il.set_flag('z', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x04)))) - il.append(il.set_flag('n', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x08)))) - il.append(il.set_flag('x', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x10)))) - else: - flags = 'nzvc' - if ((isinstance(source, OpRegisterDirect) and source.reg in ('usp', 'ccr', 'sr')) or - (isinstance(dest, OpRegisterDirect) and dest.reg in ('usp', 'ccr', 'sr'))): - # move to/from control registers do not set flags - flags = 0 - il.append( - dest.get_dest_il(il, - source.get_source_il(il), - flags - ) - ) - elif instr in ('movea', 'movec'): - # dest.size = SIZE_LONG - # il.append( - # dest.get_dest_il(il, - # il.sign_extend(4, - # source.get_source_il(il) - # ) - # ) - # ) - il.append( - dest.get_dest_il(il, - source.get_source_il(il) - ) - ) - elif instr == 'clr': - il.append( - dest.get_dest_il(il, - il.const(4, 0), - 'nzvc' - ) - ) - elif instr in ('add', 'addi', 'addq'): - il.append( - dest.get_dest_il(il, - il.add(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='*' - ) - ) - ) - elif instr == 'adda': - dest.size = SIZE_LONG - il.append( - dest.get_dest_il(il, - il.add(4, - dest.get_source_il(il), - il.sign_extend(4, - source.get_source_il(il) - ) - ) - ) - ) - elif instr == 'addx': - il.append( - dest.get_dest_il(il, - il.add(size_bytes, - il.add(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='*' - ), - il.flag('x'), - flags='*' - ) - ) - ) - elif instr in ('sub', 'subi', 'subq'): - il.append( - dest.get_dest_il(il, - il.sub(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='*' - ) - ) - ) - elif instr == 'suba': - dest.size = SIZE_LONG - il.append( - dest.get_dest_il(il, - il.sub(4, - dest.get_source_il(il), - il.sign_extend(4, - source.get_source_il(il) - ) - ) - ) - ) - elif instr == 'subx': - il.append( - dest.get_dest_il(il, - il.sub(size_bytes, - il.sub(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='*' - ), - il.flag('x'), - flags='*' - ) - ) - ) - elif instr == 'neg': - il.append( - dest.get_dest_il(il, - il.neg_expr(size_bytes, - dest.get_source_il(il), - flags='*' - ) - ) - ) - elif instr == 'negx': - il.append( - dest.get_dest_il(il, - il.sub(size_bytes, - il.neg_expr(size_bytes, - dest.get_source_il(il), - flags='*' - ), - il.flag('x'), - flags='*' - ) - ) - ) - elif instr == 'abcd': - # TODO - il.append(il.unimplemented()) - elif instr == 'sbcd': - # TODO - il.append(il.unimplemented()) - elif instr == 'nbcd': - # TODO - il.append(il.unimplemented()) - elif instr == 'pack': - il.append( - il.set_reg(2, - LLIL_TEMP(0), - il.add(2, - source.get_source_il(il), - third.get_source_il(il) - ) - ) - ) - il.append( - dest.get_dest_il(il, - il.or_expr(1, - il.and_expr(2, - il.reg(2, LLIL_TEMP(0)), - il.const(2, 0x000F) - ), - il.logical_shift_right(2, - il.and_expr(2, - il.reg(2, LLIL_TEMP(0)), - il.const(2, 0x0F00) - ), - il.const(1, 4) - ) - ) - ) - ) - elif instr == 'unpk': - il.append( - il.set_reg(1, - LLIL_TEMP(0), - source.get_source_il(il) - ) - ) - il.append( - dest.get_dest_il(il, - il.add(2, - il.or_expr(2, - il.and_expr(2, - il.reg(1, LLIL_TEMP(0)), - il.const(1, 0x0F) - ), - il.shift_left(2, - il.and_expr(2, - il.reg(1, LLIL_TEMP(0)), - il.const(1, 0xF0) - ), - il.const(1, 4) - ) - ), - third.get_source_il(il) - ) - ) - ) - elif instr in ('muls', 'mulu'): - if isinstance(dest, OpRegisterDirectPair): - il.append( - il.set_reg_split(4, - dest.reg1, - dest.reg2, - il.mult(4, - source.get_source_il(il), - dest.get_source_il(il)[0], - flags='nzvc' - ) - ) - ) - else: - il.append( - il.set_reg(4, - dest.reg, - il.mult(4, - source.get_source_il(il), - dest.get_source_il(il), - flags='nzvc' - ) - ) - ) - elif instr == 'divs': - if size == 1: - dividend_il = dest.get_source_il(il) - divisor_il = source.get_source_il(il) - dest.size = SIZE_LONG - il.append( - dest.get_dest_il(il, - il.or_expr(4, - il.shift_left(4, il.mod_signed(2, dividend_il, divisor_il), il.const(1, 16)), - il.div_signed(2, dividend_il, divisor_il, flags='nzvc') - ) - ) - ) - elif isinstance(dest, OpRegisterDirect): - dividend_il = dest.get_source_il(il) - divisor_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.div_signed(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - else: - dividend_il = il.or_expr(8, il.shift_left(8, il.reg(4, dest.reg1), il.const(1, 32)), il.reg(4, dest.reg2)) - divisor_il = source.get_source_il(il) - il.append( - il.set_reg(4, - LLIL_TEMP(0), - il.mod_signed(4, dividend_il, divisor_il) - ) - ) - il.append( - il.set_reg(4, - dest.reg2, - il.div_signed(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - il.append( - il.set_reg(4, - dest.reg1, - il.reg(4, LLIL_TEMP(0)) - ) - ) - elif instr == 'divsl': - dividend_il = il.reg(4, dest.reg2) - divisor_il = source.get_source_il(il) - il.append( - il.set_reg(4, - dest.reg1, - il.mod_signed(4, dividend_il, divisor_il) - ) - ) - il.append( - il.set_reg(4, - dest.reg2, - il.div_signed(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - elif instr == 'divu': - if size == 1: - dividend_il = dest.get_source_il(il) - divisor_il = source.get_source_il(il) - dest.size = SIZE_LONG - il.append( - dest.get_dest_il(il, - il.or_expr(4, - il.shift_left(4, il.mod_unsigned(2, dividend_il, divisor_il), il.const(1, 16)), - il.div_unsigned(2, dividend_il, divisor_il, flags='nzvc') - ) - ) - ) - elif isinstance(dest, OpRegisterDirect): - dividend_il = dest.get_source_il(il) - divisor_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - else: - dividend_il = il.or_expr(8, il.shift_left(8, il.reg(4, dest.reg1), il.const(1, 32)), il.reg(4, dest.reg2)) - divisor_il = source.get_source_il(il) - il.append( - il.set_reg(4, - LLIL_TEMP(0), - il.mod_unsigned(4, dividend_il, divisor_il) - ) - ) - il.append( - il.set_reg(4, - dest.reg2, - il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - il.append( - il.set_reg(4, - dest.reg1, - il.reg(4, LLIL_TEMP(0)) - ) - ) - elif instr == 'divul': - dividend_il = il.reg(4, dest.reg2) - divisor_il = source.get_source_il(il) - il.append( - il.set_reg(4, - dest.reg1, - il.mod_unsigned(4, dividend_il, divisor_il) - ) - ) - il.append( - il.set_reg(4, - dest.reg2, - il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') - ) - ) - elif instr == 'cas': - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - il.append( - il.sub(size_bytes, - third.get_source_il(il), - source.get_source_il(il), - flags='nzvc' - ) - ) - - equal = LowLevelILLabel() - not_equal = LowLevelILLabel() - - il.append( - il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), equal, not_equal) - ) - - il.mark_label(equal) - - il.append( - third.get_dest_il(il, - dest.get_source_il(il) - ) - ) - - il.append( - il.goto(skip) - ) - - il.mark_label(not_equal) - - il.append( - source.get_dest_il(il, - third.get_source_il(il) - ) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'cas2': - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - il.append( - il.sub(size_bytes, - third.get_source_il(il)[0], - source.get_source_il(il)[0], - flags='nzvc' - ) - ) - - equal = LowLevelILLabel() - not_equal = LowLevelILLabel() - check2 = LowLevelILLabel() - - il.append( - il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), check2, not_equal) - ) - - il.mark_label(check2) - - il.append( - il.sub(size_bytes, - third.get_source_il(il)[1], - source.get_source_il(il)[1], - flags='nzvc' - ) - ) - - il.append( - il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), equal, not_equal) - ) - - il.mark_label(equal) - - for it in third.get_dest_il(il, - dest.get_source_il(il) - ): - il.append(it) - - il.append( - il.goto(skip) - ) - - il.mark_label(not_equal) - - for it in source.get_dest_il(il, - third.get_source_il(il) - ): - il.append(it) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'chk': - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - trap = LowLevelILLabel() - check = LowLevelILLabel() - - il.append( - il.if_expr( - il.compare_unsigned_less_than(size_bytes, - dest.get_source_il(il), - il.const(size_bytes, 0) - ), - trap, - check - ) - ) - - il.mark_label(check) - - il.append( - il.if_expr( - il.compare_unsigned_greater_than(size_bytes, - dest.get_source_il(il), - source.get_source_il(il) - ), - trap, - skip - ) - ) - - il.mark_label(trap) - - il.append( - il.system_call() - ) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'chk2': - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - trap = LowLevelILLabel() - check = LowLevelILLabel() - - il.append( - il.set_reg(4, - LLIL_TEMP(0), - source.get_address_il(il) - ) - ) - - il.append( - il.if_expr( - il.compare_unsigned_less_than(size_bytes, - dest.get_source_il(il), - il.load(size_bytes, - il.reg(4, LLIL_TEMP(0)) - ) - ), - trap, - check - ) - ) - - il.mark_label(check) - - il.append( - il.if_expr( - il.compare_unsigned_greater_than(size_bytes, - dest.get_source_il(il), - il.load(size_bytes, - il.add(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, size_bytes) - ) - ) - ), - trap, - skip - ) - ) - - il.mark_label(trap) - - il.append( - il.system_call() - ) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'bchg': - bit_number_il = il.mod_unsigned(1, - source.get_source_il(il), - il.const(1, 8 << dest.size) - ) - il.append( - il.set_flag('z', - il.compare_not_equal(4, - il.test_bit(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ), - il.const(4, 0) - ) - ) - ) - il.append( - dest.get_dest_il(il, - il.xor_expr(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ) - ) - ) - elif instr == 'bclr': - bit_number_il = il.mod_unsigned(1, - source.get_source_il(il), - il.const(1, 8 << dest.size) - ) - il.append( - il.set_flag('z', - il.compare_not_equal(4, - il.test_bit(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ), - il.const(4, 0) - ) - ) - ) - il.append( - dest.get_dest_il(il, - il.and_expr(4, - dest.get_source_il(il), - il.not_expr(4, - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ) - ) - ) - ) - elif instr == 'bset': - bit_number_il = il.mod_unsigned(1, - source.get_source_il(il), - il.const(1, 8 << dest.size) - ) - il.append( - il.set_flag('z', - il.compare_not_equal(4, - il.test_bit(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ), - il.const(4, 0) - ) - ) - ) - il.append( - dest.get_dest_il(il, - il.or_expr(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ) - ) - ) - elif instr == 'btst': - bit_number_il = il.mod_unsigned(1, - source.get_source_il(il), - il.const(1, 8 << dest.size) - ) - il.append( - il.set_flag('z', - il.compare_not_equal(4, - il.test_bit(4, - dest.get_source_il(il), - il.shift_left(4, - il.const(4, 1), - bit_number_il - ) - ), - il.const(4, 0) - ) - ) - ) - elif instr in ('asl', 'lsl'): - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.shift_left(size_bytes, - dest.get_source_il(il), - source_il, - flags='*' - ) - ) - ) - elif instr == 'asr': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.arith_shift_right(size_bytes, - dest.get_source_il(il), - source_il, - flags='*' - ) - ) - ) - elif instr == 'lsr': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.logical_shift_right(size_bytes, - dest.get_source_il(il), - source_il, - flags='*' - ) - ) - ) - elif instr == 'rol': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.rotate_left(size_bytes, - dest.get_source_il(il), - source_il, - flags='*' - ) - ) - ) - elif instr == 'ror': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.rotate_right(size_bytes, - dest.get_source_il(il), - source_il, - flags='*' - ) - ) - ) - elif instr == 'roxl': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.rotate_left_carry(size_bytes, - dest.get_source_il(il), - source_il, - il.flag('x'), - flags='*' - ) - ) - ) - elif instr == 'roxr': - source_il = il.const(1, 1) - if source is not None: - source_il = source.get_source_il(il) - il.append( - dest.get_dest_il(il, - il.rotate_right_carry(size_bytes, - dest.get_source_il(il), - source_il, - il.flag('x'), - flags='*' - ) - ) - ) - elif instr in ('cmp', 'cmpi', 'cmpm'): - il.append( - il.sub(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='nzvc' - ) - ) - elif instr == 'cmpa': - dest.size = SIZE_LONG - il.append( - il.sub(4, - dest.get_source_il(il), - il.sign_extend(4, - source.get_source_il(il) - ), - flags='nzvc' - ) - ) - elif instr == 'cmp2': - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - check = LowLevelILLabel() - - il.append( - il.set_reg(4, - LLIL_TEMP(0), - source.get_address_il(il) - ) - ) - - il.append( - il.sub(size_bytes, - dest.get_source_il(il), - il.load(size_bytes, - il.reg(4, LLIL_TEMP(0)) - ), - flags='nzvc' - ) - ) - - il.append( - il.if_expr( - il.flag_condition(LowLevelILFlagCondition.LLFC_ULT), - skip, - check - ) - ) - - il.mark_label(check) - - il.append( - il.sub(size_bytes, - dest.get_source_il(il), - il.load(size_bytes, - il.add(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, size_bytes) - ) - ), - flags='nzvc' - ) - ) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'tas': - il.append( - il.set_reg(1, LLIL_TEMP(0), dest.get_source_il(il), flags='nzvc') - ) - il.append( - dest.get_dest_il(il, - il.or_expr(1, - il.reg(1, LLIL_TEMP(0)), - il.const(1, 0x80) - ) - ) - ) - elif instr == 'tst': - il.append( - il.sub(size_bytes, - dest.get_source_il(il), - il.const(4, 0), - flags='nzvc' - ) - ) - elif instr in ('and', 'andi'): - if instr == 'andi' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): - if not source.value & 0x01: il.append(il.set_flag('c', il.const(1, 0))) - if not source.value & 0x02: il.append(il.set_flag('v', il.const(1, 0))) - if not source.value & 0x04: il.append(il.set_flag('z', il.const(1, 0))) - if not source.value & 0x08: il.append(il.set_flag('n', il.const(1, 0))) - if not source.value & 0x11: il.append(il.set_flag('x', il.const(1, 0))) - else: - il.append( - dest.get_dest_il(il, - il.and_expr(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='nzvc' - ) - ) - ) - elif instr in ('or', 'ori'): - if instr == 'ori' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): - if source.value & 0x01: il.append(il.set_flag('c', il.const(1, 1))) - if source.value & 0x02: il.append(il.set_flag('v', il.const(1, 1))) - if source.value & 0x04: il.append(il.set_flag('z', il.const(1, 1))) - if source.value & 0x08: il.append(il.set_flag('n', il.const(1, 1))) - if source.value & 0x11: il.append(il.set_flag('x', il.const(1, 1))) - else: - il.append( - dest.get_dest_il(il, - il.or_expr(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='nzvc' - ) - ) - ) - elif instr in ('eor', 'eori'): - if instr == 'eori' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): - if source.value & 0x01: il.append(il.set_flag('c', il.xor_expr(1, il.flag('c'), il.const(1, 1)))) - if source.value & 0x02: il.append(il.set_flag('v', il.xor_expr(1, il.flag('v'), il.const(1, 1)))) - if source.value & 0x04: il.append(il.set_flag('z', il.xor_expr(1, il.flag('z'), il.const(1, 1)))) - if source.value & 0x08: il.append(il.set_flag('n', il.xor_expr(1, il.flag('n'), il.const(1, 1)))) - if source.value & 0x11: il.append(il.set_flag('x', il.xor_expr(1, il.flag('x'), il.const(1, 1)))) - else: - il.append( - dest.get_dest_il(il, - il.xor_expr(size_bytes, - dest.get_source_il(il), - source.get_source_il(il), - flags='nzvc' - ) - ) - ) - elif instr == 'not': - il.append( - dest.get_dest_il(il, - il.not_expr(size_bytes, - dest.get_source_il(il), - flags='nzvc' - ) - ) - ) - elif instr == 'swap': - il.append( - dest.get_dest_il(il, - il.rotate_right(4, - dest.get_source_il(il), - il.const(1, 16) - ) - ) - ) - elif instr == 'exg': - il.append( - il.set_reg(4, LLIL_TEMP(0), source.get_source_il(il)) - ) - il.append( - source.get_dest_il(il, dest.get_source_il(il)) - ) - il.append( - dest.get_dest_il(il, il.reg(4, LLIL_TEMP(0))) - ) - elif instr == 'ext': - if not dest: - il.append(il.unimplemented()) - elif dest.size == 1: - il.append( - il.set_reg(2, - dest.reg, - il.sign_extend(4, - il.reg(1, dest.reg), - flags='nzvc' - ) - ) - ) - else: - il.append( - il.set_reg(4, - dest.reg, - il.sign_extend(4, - il.reg(2, dest.reg), - flags='nzvc' - ) - ) - ) - elif instr == 'extb': - reg = dest.reg - il.append( - il.set_reg(4, - reg, - il.sign_extend(4, - il.reg(1, reg), - flags='nzvc' - ) - ) - ) - elif instr == 'movem': - if isinstance(source, OpRegisterMovemList): - if isinstance(dest, OpRegisterIndirectPredecrement): - il.append( - il.set_reg(4, LLIL_TEMP(0), dest.get_address_il(il)) - ) - if self.movem_store_decremented: - il.append( - il.set_reg(4, - dest.reg, - il.sub(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, len(source.regs)*size_bytes) - ) - ) - ) - for k in range(len(source.regs)): - il.append( - il.store(size_bytes, - il.sub(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, (k+1)*size_bytes) - ), - il.reg(size_bytes, source.regs[len(source.regs)-1-k]) - ) - ) - if not self.movem_store_decremented: - il.append( - il.set_reg(4, - dest.reg, - il.sub(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, len(source.regs)*size_bytes) - ) - ) - ) - else: - il.append( - il.set_reg(4, LLIL_TEMP(0), dest.get_address_il(il)) - ) - for k in range(len(source.regs)): - il.append( - il.store(size_bytes, - il.add(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, k*size_bytes) - ), - il.reg(size_bytes, source.regs[k]) - ) - ) - else: - il.append( - il.set_reg(4, LLIL_TEMP(0), source.get_address_il(il)) - ) - for k in range(len(dest.regs)): - il.append( - il.set_reg(size_bytes, - dest.regs[k], - il.load(size_bytes, - il.add(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, k*size_bytes) - ) - ) - ) - ) - if isinstance(source, OpRegisterIndirectPostincrement): - il.append( - il.set_reg(4, - source.reg, - il.add(4, - il.reg(4, LLIL_TEMP(0)), - il.const(4, len(dest.regs)*size_bytes) - ) - ) - ) - elif instr == 'lea': - il.append( - dest.get_dest_il(il, source.get_address_il(il)) - ) - elif instr == 'pea': - il.append( - il.push(4, dest.get_address_il(il)) - ) - elif instr == 'link': - source.size = SIZE_LONG - il.append( - il.push(4, source.get_source_il(il)) - ) - il.append( - source.get_dest_il(il, il.reg(4, "sp")) - ) - il.append( - il.set_reg(4, - "sp", - il.add(4, - il.reg(4, "sp"), - il.sign_extend(4, dest.get_source_il(il)) - ) - ) - ) - elif instr == 'unlk': - il.append( - il.set_reg(4, "sp", source.get_source_il(il)) - ) - il.append( - source.get_dest_il(il, il.pop(4)) - ) - elif instr in ('jmp', 'bra'): - tmpil = LowLevelILFunction(il.arch) - _dest_il = dest.get_address_il2(tmpil) - dest_il = _dest_il[0] - for i in _dest_il[1]: - tmpil.append(i) - - dstlabel = None - try: - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: - dstlabel = il.get_label_for_address(il.arch, tmpil[dest_il].constant) - except: - raise - - if dstlabel is not None: - il.append( - il.goto(dstlabel) - ) - else: - il.append( - il.jump(dest.get_address_il(il)) - ) - elif instr in ('jsr', 'bsr'): - il.append( - il.call(dest.get_address_il(il)) - ) - elif instr == 'callm': - # TODO - il.append(il.unimplemented()) - elif instr == 'cpush': - # TODO - il.append(il.unimplemented()) - elif instr in ('bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', 'bvc', 'bvs', - 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble'): - flag_cond = ConditionMapping.get(instr[1:], None) - tmpil = LowLevelILFunction(il.arch) - _dest_il = dest.get_address_il2(tmpil) - dest_il = _dest_il[0] - for i in _dest_il[1]: - tmpil.append(i) - cond_il = None - - if flag_cond is not None: - cond_il = il.flag_condition(flag_cond) - - if cond_il is None: - il.append(il.unimplemented()) - else: - t = None - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: - t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) - - indirect = False - - if t is None: - t = LowLevelILLabel() - indirect = True - - f_label_found = True - - f = il.get_label_for_address(il.arch, il.current_address+length) - - if f is None: - f = LowLevelILLabel() - f_label_found = False - - il.append( - il.if_expr(cond_il, t, f) - ) - - if indirect: - il.mark_label(t) - il.append(il.jump(dest.get_address_il(il))) - - if not f_label_found: - il.mark_label(f) - elif instr in ('dbt', 'dbf', 'dbhi', 'dbls', 'dbcc', 'dbcs', 'dbne', - 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', - 'dbgt', 'dble'): - flag_cond = ConditionMapping.get(instr[2:], None) - tmpil = LowLevelILFunction(il.arch) - _dest_il = dest.get_address_il2(tmpil) - dest_il = _dest_il[0] - for i in _dest_il[1]: - tmpil.append(i) - cond_il = None - - if flag_cond is not None: - cond_il = il.flag_condition(flag_cond) - elif instr == 'dbt': - cond_il = il.const(1, 1) - elif instr == 'dbf': - cond_il = il.const(1, 0) - - if cond_il is None: - il.append(il.unimplemented()) - else: - branch = None - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: - branch = il.get_label_for_address(Architecture['M68000'], tmpil[dest_il].constant) - - indirect = False - - if branch is None: - branch = LowLevelILLabel() - indirect = True - - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - decrement = LowLevelILLabel() - - il.append( - il.if_expr(cond_il, skip, decrement) - ) - - il.mark_label(decrement) - - il.append( - il.set_reg(2, - LLIL_TEMP(0), - il.sub(2, - source.get_source_il(il), - il.const(2, 1) - ) - ) - ) - - il.append( - source.get_dest_il(il, il.reg(2, LLIL_TEMP(0))) - ) - - il.append( - il.if_expr( - il.compare_equal(2, - il.reg(2, LLIL_TEMP(0)), - il.const(2, -1) - ), - skip, - branch - ) - ) - - if indirect: - il.mark_label(branch) - il.append(il.jump(dest.get_address_il(il))) - - if not skip_label_found: - il.mark_label(skip) - elif instr in ('st', 'sf', 'shi', 'sls', 'scc', 'scs', 'sne', 'seq', - 'svc', 'svs', 'spl', 'smi', 'sge', 'slt', 'sgt', 'sle'): - flag_cond = ConditionMapping.get(instr[1:], None) - cond_il = None - - if flag_cond is not None: - cond_il = il.flag_condition(flag_cond) - elif instr == 'st': - cond_il = il.const(1, 1) - elif instr == 'sf': - cond_il = il.const(1, 0) - - if cond_il is None: - il.append(il.unimplemented()) - else: - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - set_dest = LowLevelILLabel() - clear_dest = LowLevelILLabel() - - il.append( - il.if_expr(cond_il, set_dest, clear_dest) - ) - - il.mark_label(set_dest) - - il.append( - dest.get_dest_il(il, il.const(1, 1)) - ) - - il.append( - il.goto(skip) - ) - - il.mark_label(clear_dest) - - il.append( - dest.get_dest_il(il, il.const(1, 0)) - ) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr == 'rtd': - il.append( - il.set_reg(4, - LLIL_TEMP(0), - il.pop(4) - ) - ) - il.append( - il.set_reg(4, 'sp', - il.add(4, - il.reg(4, 'sp'), - il.sign_extend(4, il.const(2, - dest.value), - 0 - ) - ) - ) - ) - il.append( - il.ret( - il.reg(4, LLIL_TEMP(0)) - ) - ) - elif instr == 'rte': - il.append( - il.set_reg(2, - "sr", - il.pop(2) - ) - ) - il.append( - il.ret( - il.pop(4) - ) - ) - elif instr == 'rtm': - # TODO - il.append(il.unimplemented()) - elif instr == 'rtr': - il.append( - il.set_reg(2, - "ccr", - il.pop(2) - ) - ) - il.append( - il.ret( - il.pop(4) - ) - ) - elif instr == 'rts': - il.append( - il.ret( - il.pop(4) - ) - ) - elif instr in ('trapv', 'trapt', 'trapf', 'traphi', 'trapls', 'trapcc', - 'trapcs', 'trapne', 'trapeq', 'trapvc', 'trapvs', 'trappl', - 'trapmi', 'trapge', 'traplt', 'trapgt', 'traple'): - flag_cond = ConditionMapping.get(instr[4:], None) - cond_il = None - - if flag_cond is not None: - cond_il = il.flag_condition(flag_cond) - elif instr == 'trapt': - cond_il = il.const(1, 1) - elif instr == 'trapf': - cond_il = il.const(1, 0) - elif instr == 'trapv': - cond_il = il.flag_condition(LowLevelILFlagCondition.LLFC_O) - - if cond_il is None: - il.append(il.unimplemented()) - else: - skip_label_found = True - - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) - - if skip is None: - skip = LowLevelILLabel() - skip_label_found = False - - trap = LowLevelILLabel() - - il.append( - il.if_expr(cond_il, trap, skip) - ) - - il.mark_label(trap) - - il.append( - il.system_call() - ) - - il.append( - il.goto(skip) - ) - - if not skip_label_found: - il.mark_label(skip) - elif instr in ('trap', 'illegal', 'bkpt'): - il.append(il.system_call()) - elif instr in ('bgnd', 'nop', 'reset', 'stop'): - il.append(il.nop()) - else: - il.append(il.unimplemented()) - - def get_instruction_info(self, data: bytes, addr: int) -> Optional[InstructionInfo]: - instr, length, _size, _source, dest, _third = self.decode_instruction(data, addr) - if instr == 'unimplemented': - return None - - result = InstructionInfo() - result.length = length - - if instr in ('rtd', 'rte', 'rtr', 'rts'): - result.add_branch(BranchType.FunctionReturn) - elif instr in ('jmp', 'jsr', - 'bra', 'bsr', 'bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', - 'bvc', 'bvs', 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble', - 'dbt', 'dbf', 'dbhi', 'dbls', 'dbcc', 'dbcs', 'dbne', - 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', - 'dbgt', 'dble'): - conditional = False - branch_dest = None - - bt = BranchType.UnresolvedBranch - if instr in ('jmp', 'bra'): - bt = BranchType.UnconditionalBranch - elif instr in ('jsr', 'bsr'): - bt = BranchType.CallDestination - else: - conditional = True - - if isinstance(dest, OpAbsolute): - branch_dest = dest.address - elif isinstance(dest, OpRegisterIndirect): - if dest.reg == 'pc': - branch_dest = addr+2 - else: - bt = BranchType.UnresolvedBranch - elif isinstance(dest, OpRegisterIndirectDisplacement): - if dest.reg == 'pc': - branch_dest = addr+2+dest.offset - else: - bt = BranchType.UnresolvedBranch - elif isinstance(dest, OpRegisterIndirectIndex): - bt = BranchType.UnresolvedBranch - - if conditional: - # pylint: disable=unsubscriptable-object - if instr[0:2] == 'db': - result.add_branch(BranchType.TrueBranch, addr+length) - result.add_branch(BranchType.FalseBranch, branch_dest) - else: - result.add_branch(BranchType.TrueBranch, branch_dest) - result.add_branch(BranchType.FalseBranch, addr+length) - else: - if bt == BranchType.IndirectBranch or bt == BranchType.UnresolvedBranch or branch_dest is None: - result.add_branch(bt) - else: - result.add_branch(bt, branch_dest) - - return result - - def get_instruction_text(self, data: bytes, addr: int) -> Optional[Tuple[List['function.InstructionTextToken'], int]]: - instr, length, size, source, dest, third = self.decode_instruction(data, addr) - - if size is not None: - # pylint: disable=invalid-sequence-index - instr += SizeSuffix[size] - - tokens = [InstructionTextToken(InstructionTextTokenType.InstructionToken, "%-10s" % instr)] - - if source is not None: - tokens += source.format(addr) - - if dest is not None: - if source is not None: - tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')] - tokens += dest.format(addr) - - if third is not None: - if source is not None or dest is not None: - tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')] - tokens += third.format(addr) - - return tokens, length - - def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.LowLevelILFunction) -> Optional[int]: - instr, length, size, source, dest, third = self.decode_instruction(data, addr) - - if instr == 'movem': - # movem overrides default predecrement/postincrement IL generation - - self.generate_instruction_il(il, instr, length, size, source, dest, third) - - elif instr is not None: - - # predecrement - if source is not None: - pre_il = source.get_pre_il(il) - if pre_il is not None: - il.append(pre_il) - - if dest is not None: - pre_il = dest.get_pre_il(il) - if pre_il is not None: - il.append(pre_il) - - if third is not None: - pre_il = third.get_pre_il(il) - if pre_il is not None: - il.append(pre_il) - - self.generate_instruction_il(il, instr, length, size, source, dest, third) - - # postincrement - if source is not None: - post_il = source.get_post_il(il) - if post_il is not None: - il.append(post_il) - - if dest is not None: - post_il = dest.get_post_il(il) - if post_il is not None: - il.append(post_il) - - if third is not None: - post_il = third.get_post_il(il) - if post_il is not None: - il.append(post_il) - else: - il.append(il.unimplemented()) - return length - - def is_never_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: - data = bytearray(data) - if data[0] & 0xf0 == 0x60: - # BRA, BSR, Bcc - return True - if data[0] == 0x4e and data[1] & 0x80 == 0x80: - # JMP, JSR - return True - return False - - def is_invert_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: - data = bytearray(data) - if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: - # Bcc - return True - return False - - def is_always_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: - data = bytearray(data) - if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: - # Bcc - return True - return False - - def is_skip_and_return_zero_patch_available(self, data: bytes, addr: int = 0) -> bool: - return self.skip_and_return_value(data, addr, 0) - - def is_skip_and_return_value_patch_available(self, data: bytes, addr: int = 0) -> bool: - data = bytearray(data) - if data[0] == 0x61: - # BSR - return True - if data[0] == 0x4e and data[1] & 0xc0 == 0x80: - # JSR - return True - return False - - def convert_to_nop(self, data: bytes, addr: int = 0) -> Optional[bytes]: - count = int(len(data)/2) - if count*2 != len(data): - return None - return b'\x4e\x71' * count - - def never_branch(self, data, addr): - data = bytearray(data) - if data[0] & 0xf0 == 0x60: - # BRA, BSR, Bcc - return self.convert_to_nop(data, addr) - if data[0] == 0x4e and data[1] & 0x80 == 0x80: - # JMP, JSR - return self.convert_to_nop(data, addr) - return None - - def invert_branch(self, data: bytes, addr: int = 0) -> Optional[bytes]: - data = bytearray(data) - if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: - # Bcc - return bytearray([data[0]^1])+data[1:] - return None - - def always_branch(self, data: bytes, addr: int = 0) -> Optional[bytes]: - data = bytearray(data) - if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: - # Bcc - return b'\x60'+data[1:] - return None - - def skip_and_return_value(self, data: bytes, addr: int, value: int) -> Optional[bytes]: - count = int(len(data)/2) - if count*2 != len(data): - return None - data = bytearray(data) - ok = False - if data[0] == 0x61: - # BSR - ok = True - if data[0] == 0x4e and data[1] & 0xc0 == 0x80: - # JSR - ok = True - if not ok: - return None - - if value > 0x80000000: - value = value - 0x100000000 - - if value >= -128 and value <= 127 and len(data) >= 2: - value = value & 0xff - return b'\x70'+struct.pack('>b',value)+b'\x4e\x71'*(count-1) - - if len(data) >= 6: - return b'\x20\x3C'+struct.pack('>l', value)+b'\x4e\x71'*(count-3) - - return None - - -class M68008(M68000): - name = "M68008" - - -class M68010(M68000): - name = "M68010" - control_registers = { - 0x000: 'sfc', - 0x001: 'dfc', - 0x800: 'usp', - 0x801: 'vbr', - } - - # add BKPT, MOVE from CCR, MOVEC, MOVES, RTD - - -class M68020(M68010): - name = "M68020" - control_registers = { - 0x000: 'sfc', - 0x001: 'dfc', - 0x800: 'usp', - 0x801: 'vbr', - 0x002: 'cacr', - 0x802: 'caar', - 0x803: 'msp', - 0x804: 'isp', - } - address_size = 4 - memory_indirect = True - movem_store_decremented = True - - # add BFCHG, BFCLR, BFEXTS, BFEXTU, BFFO, BFINS, BFSET, BFTST, CALLM, CAS, CAS2, CHK2, CMP2, cpBcc, cpDBcc, cpGEN, cpRESTORE, cpSAVE, cpScc, cpTRAPcc - # DIVSL, DIVUL, EXTB, PACK, RTM, TRAPcc, UNPK - # add memory indirect addressing - - -class M68030(M68020): - name = "M68030" - - # remove CALLM, RTM - # add PFLUSH, PFLUSHA, PLOAD, PMOVE, PTEST - - -class M68040(M68030): - name = "M68040" - control_registers = { - 0x000: 'sfc', - 0x001: 'dfc', - 0x800: 'usp', - 0x801: 'vbr', - 0x002: 'cacr', - 0x803: 'msp', - 0x804: 'isp', - 0x003: 'tc', - 0x004: 'itt0', - 0x005: 'itt1', - 0x006: 'dtt0', - 0x007: 'dtt1', - 0x805: 'mmusr', - 0x806: 'urp', - 0x807: 'srp', - } - - # remove cpBcc, cpDBcc, cpGEN, cpRESTORE, cpSAVE, cpScc, cpTRAPcc, PFLUSHA, PLOAD, PMOVE - # add CINV, CPUSH, floating point, MOVE16 - - -class M68LC040(M68040): - name = "M68LC040" - - -class M68EC040(M68040): - name = "M68EC040" - control_registers = { - 0x000: 'sfc', - 0x001: 'dfc', - 0x800: 'usp', - 0x801: 'vbr', - 0x002: 'cacr', - 0x803: 'msp', - 0x804: 'isp', - 0x004: 'iacr0', - 0x005: 'iacr1', - 0x006: 'dacr0', - 0x007: 'dacr1' - } - - -class M68330(M68010): - name = "M68330" - movem_store_decremented = True - # AKA CPU32 - - # add BGND, CHK2, CMP2, DIVSL, DIVUL, EXTB, LPSTOP, TBLS, TBLSN, TBLU, TBLUN, TRAPcc - - -class M68340(M68330): - name = "M68340" - - -def create_vector_table(view, addr, size=256): - vectors = { - 0: 'reset_initial_interrupt_stack_pointer', - 1: 'reset_initial_program_counter', - 2: 'access_fault', - 3: 'address_error', - 4: 'illegal_instruction', - 5: 'integer_divide_by_zero', - 6: 'chk_chk2_instruction', - 7: 'ftrapcc_trapcc_trapv_instruction', - 8: 'privilege_violation', - 9: 'trace', - 10: 'line_1010_emulator', - 11: 'line_1111_emulator', - # 12 unassigned_reserved - 13: 'coprocessor_protocol_violation', - 14: 'format_error', - 15: 'uninitialized_interrupt', - # 16-23 unassigned_reserved - 24: 'spurious_interrupt', - 25: 'level_1_interrupt_autovector', - 26: 'level_2_interrupt_autovector', - 27: 'level_3_interrupt_autovector', - 28: 'level_4_interrupt_autovector', - 29: 'level_5_interrupt_autovector', - 30: 'level_6_interrupt_autovector', - 31: 'level_7_interrupt_autovector', - 32: 'trap_0_instruction', - 33: 'trap_1_instruction', - 34: 'trap_2_instruction', - 35: 'trap_3_instruction', - 36: 'trap_4_instruction', - 37: 'trap_5_instruction', - 38: 'trap_6_instruction', - 39: 'trap_7_instruction', - 40: 'trap_8_instruction', - 41: 'trap_9_instruction', - 42: 'trap_10_instruction', - 43: 'trap_11_instruction', - 44: 'trap_12_instruction', - 45: 'trap_13_instruction', - 46: 'trap_14_instruction', - 47: 'trap_15_instruction', - 48: 'fp_branch_or_set_on_unordered_condition', - 49: 'fp_inexact_result', - 50: 'fp_divide_by_zero', - 51: 'fp_underflow', - 52: 'fp_operand_error', - 53: 'fp_overflow', - 54: 'fp_signaling_nan', - 55: 'fp_unimplemented_data_type', - 56: 'mmu_configuration_error', - 57: 'mmu_illegal_operation_error', - 58: 'mmu_access_level_violation_error', - # 59-63 unassigned_reserved - } - for k in range(0, 192): - vectors[k+64] = 'user_%d' % k - - t = view.parse_type_string("void *")[0] - - for k in range(size): - name = vectors.get(k, 'unassigned_reserved') - - view.define_user_symbol(Symbol(SymbolType.DataSymbol, addr+4*k, "_vector_%d_%s" % (k, name))) - view.define_user_data_var(addr+4*k, t) - value = struct.unpack(">L", view.read(addr+4*k, 4))[0] - - if k > 0: - view.define_user_symbol(Symbol(SymbolType.FunctionSymbol, value, "vector_%d_%s" % (k, name))) - if value > 0: - view.add_entry_point(value) - - -def prompt_create_vector_table(view, addr=None): - architectures = ['M68000', 'M68008', 'M68010', 'M68020', 'M68030', 'M68040', 'M68LC040', 'M68EC040', 'M68330', 'M68340'] - size_choices = ['Full (256)', 'MMU (59)', 'FP (56)', 'Traps (48)', 'Interrupts (32)'] - size_raw = [256, 59, 56, 48, 32] - - if addr is None: - addr = 0 - - need_arch = True - if view.platform is not None and view.platform.arch.name in architectures: - # 68k arch already selected - need_arch = False - - address_field = AddressField('Address', view, addr) - arch_field = ChoiceField('Architecture', architectures) - size_field = ChoiceField('Table size', size_choices) - - res = False - - if need_arch: - res = get_form_input([address_field, arch_field, size_field], 'Create M68k vector table') - else: - res = get_form_input([address_field, size_field], 'Create M68k vector table') - - if res: - address = address_field.result - size = size_raw[size_field.result] - - if need_arch: - arch = architectures[arch_field.result] - view.platform = Architecture[arch].standalone_platform - - create_vector_table(view, address, size) - +from .m68k import * #PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) diff --git a/m68k.py b/m68k.py new file mode 100644 index 0000000..70880f1 --- /dev/null +++ b/m68k.py @@ -0,0 +1,3697 @@ +""" + +Copyright (c) 2017 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +from __future__ import print_function + +import sys + +__module__ = sys.modules[__name__] + +import binaryninja +__logger = binaryninja.Logger(0, __module__.__name__) + +log = __logger.log +log_debug = __logger.log_debug +log_info = __logger.log_info +log_warn = __logger.log_warn +log_error = __logger.log_error +log_alert = __logger.log_alert + +from typing import List, Optional, Tuple + +import struct +import traceback +import os + +from binaryninja.architecture import Architecture +from binaryninja.lowlevelil import LowLevelILLabel, LLIL_TEMP, LowLevelILFunction, ExpressionIndex +from binaryninja.function import RegisterInfo, InstructionInfo, InstructionTextToken +from binaryninja.binaryview import BinaryView +from binaryninja.plugin import PluginCommand +from binaryninja.interaction import AddressField, ChoiceField, get_form_input +from binaryninja.types import Symbol +from binaryninja.enums import (Endianness, BranchType, InstructionTextTokenType, + LowLevelILOperation, LowLevelILFlagCondition, FlagRole, SegmentFlag, + ImplicitRegisterExtend, SymbolType) +from binaryninja import BinaryViewType, lowlevelil + +log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') + +# Shift syles +SHIFT_SYLE_ARITHMETIC = 0, +SHIFT_SYLE_LOGICAL = 1, +SHIFT_SYLE_ROTATE_WITH_EXTEND = 2, +SHIFT_SYLE_ROTATE = 3, + +ShiftStyle = [ + 'as', # SHIFT_SYLE_ARITHMETIC + 'ls', # SHIFT_SYLE_LOGICAL + 'rox', # SHIFT_SYLE_ROTATE_WITH_EXTEND + 'ro' # SHIFT_SYLE_ROTATE +] + +BITFIELD_STYLE_TST = 0, +BITFIELD_STYLE_EXTU = 1, +BITFIELD_STYLE_CHG = 2, +BITFIELD_STYLE_EXTS = 3, +BITFIELD_STYLE_CLR = 4, +BITFIELD_STYLE_FFO = 5, +BITFIELD_STYLE_SET = 6, +BITFIELD_STYLE_INS = 7, + +BitfieldStyle = [ + "tst", # BITFIELD_STYLE_TST + "extu", # BITFIELD_STYLE_EXTU + "chg", # BITFIELD_STYLE_CHG + "exts", # BITFIELD_STYLE_EXTS + "clr", # BITFIELD_STYLE_CLR + "ffo", # BITFIELD_STYLE_FFO + "set", # BITFIELD_STYLE_SET + "ins", # BITFIELD_STYLE_INS +] + + +# Condition codes +CONDITION_TRUE = 0 +CONDITION_FALSE = 1 +CONDITION_HIGH = 2 +CONDITION_LESS_OR_SAME = 3 +CONDITION_CARRY_CLEAR = 4 +CONDITION_CARRY_SET = 5 +CONDITION_NOT_EQUAL = 6 +CONDITION_EQUAL = 7 +CONDITION_OVERFLOW_CLEAR = 8 +CONDITION_OVERFLOW_SET = 9 +CONDITION_PLUS = 10 +CONDITION_MINUS = 11 +CONDITION_GREATER_OR_EQUAL = 12 +CONDITION_LESS_THAN = 13 +CONDITION_GREATER_THAN = 14 +CONDITION_LESS_OR_EQUAL = 15 + +Condition = [ + 't', # CONDITION_TRUE + 'f', # CONDITION_FALSE + 'hi', # CONDITION_HIGH + 'ls', # CONDITION_LESS_OR_SAME + 'cc', # CONDITION_CARRY_CLEAR + 'cs', # CONDITION_CARRY_SET + 'ne', # CONDITION_NOT_EQUAL + 'eq', # CONDITION_EQUAL + 'vc', # CONDITION_OVERFLOW_CLEAR + 'vs', # CONDITION_OVERFLOW_SET + 'pl', # CONDITION_PLUS + 'mi', # CONDITION_MINUS + 'ge', # CONDITION_GREATER_OR_EQUAL + 'lt', # CONDITION_LESS_THAN + 'gt', # CONDITION_GREATER_THAN + 'le' # CONDITION_LESS_OR_EQUAL +] + +# Registers +REGISTER_D0 = 0 +REGISTER_D1 = 1 +REGISTER_D2 = 2 +REGISTER_D3 = 3 +REGISTER_D4 = 4 +REGISTER_D5 = 5 +REGISTER_D6 = 6 +REGISTER_D7 = 7 +REGISTER_A0 = 8 +REGISTER_A1 = 9 +REGISTER_A2 = 10 +REGISTER_A3 = 11 +REGISTER_A4 = 12 +REGISTER_A5 = 13 +REGISTER_A6 = 14 +REGISTER_A7 = 15 + +Registers = [ + 'd0', # REGISTER_D0 + 'd1', # REGISTER_D1 + 'd2', # REGISTER_D2 + 'd3', # REGISTER_D3 + 'd4', # REGISTER_D4 + 'd5', # REGISTER_D5 + 'd6', # REGISTER_D6 + 'd7', # REGISTER_D7 + 'a0', # REGISTER_A0 + 'a1', # REGISTER_A1 + 'a2', # REGISTER_A2 + 'a3', # REGISTER_A3 + 'a4', # REGISTER_A4 + 'a5', # REGISTER_A5 + 'a6', # REGISTER_A6 + 'sp' # REGISTER_A7 +] + +# Sizes +SIZE_BYTE = 0 +SIZE_WORD = 1 +SIZE_LONG = 2 + +SizeSuffix = [ + '.b', # SIZE_BYTE + '.w', # SIZE_WORD + '.l', # SIZE_LONG +] + +# Operands + +class Operand: + def format(self, addr: int) -> List[InstructionTextToken]: + raise NotImplementedError + + def get_pre_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_post_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + raise NotImplementedError + + def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + return self.get_address_il2(il)[0] + + def get_source_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> Optional[ExpressionIndex]: + raise NotImplementedError + +class OpRegisterDirect(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterDirect(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # a0, d0 + return [ + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + if self.reg == 'ccr': + c = il.flag_bit(1, 'c', 0) + v = il.flag_bit(1, 'v', 1) + z = il.flag_bit(1, 'z', 2) + n = il.flag_bit(1, 'n', 3) + x = il.flag_bit(1, 'x', 4) + return il.or_expr(1, il.or_expr(1, il.or_expr(1, il.or_expr(1, c, v), z), n), x) + else: + return il.reg(1 << self.size, self.reg) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'ccr': + return il.unimplemented() + + # return il.set_reg(1 << self.size, self.reg, value) + # if self.size == SIZE_BYTE: + # if self.reg[0] == 'a' or self.reg == 'sp': + # return None + # else: + # return il.set_reg(1, self.reg+'.b', value, flags) + # elif self.size == SIZE_WORD: + # return il.set_reg(2, self.reg+'.w', value, flags) + # else: + # return il.set_reg(4, self.reg, value, flags) + if self.size == SIZE_BYTE: + if self.reg[0] == 'a' or self.reg == 'sp': + return il.unimplemented() + else: + return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffffff00), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xff), value)), flags) + elif self.size == SIZE_WORD: + if self.reg[0] == 'a' or self.reg == 'sp': + return il.set_reg(4, self.reg, il.sign_extend(4, value), flags) + else: + return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffff0000), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xffff), value)), flags) + else: + if value: + return il.set_reg(4, self.reg, value, flags) + else: + return il.unimplemented() + + +class OpRegisterDirectPair(Operand): + def __init__(self, size: int, reg1: str, reg2: str): + self.size = size + self.reg1 = reg1 + self.reg2 = reg2 + + def __repr__(self): + return "OpRegisterDirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0:d1 + return [ + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), + InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + return (il.set_reg(1 << self.size, self.reg1, values[0], flags), il.set_reg(1 << self.size, self.reg2, values[1], flags)) + + +class OpRegisterMovemList(Operand): + def __init__(self, size: int, regs: List[str]): + self.size = size + self.regs = regs + + def __repr__(self): + return "OpRegisterMovemList(%d, %s)" % (self.size, repr(self.regs)) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0-d7/a0/a2/a4-a7 + if len(self.regs) == 0: + return [] + tokens = [InstructionTextToken(InstructionTextTokenType.RegisterToken, self.regs[0])] + last = self.regs[0] + first = None + for reg in self.regs[1:]: + if Registers[Registers.index(last)+1] == reg and reg != 'a0': + if first is None: + first = last + last = reg + else: + if first is not None: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "/")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, reg)) + first = None + last = reg + if first is not None: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return [il.reg(1 << self.size, reg) for reg in self.regs] + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + return [il.set_reg(1 << self.size, reg, val, flags) for reg, val in zip(self.regs, values)] + + +class OpRegisterIndirect(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirect(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # (a0) + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectPair(Operand): + def __init__(self, size: int, reg1: str, reg2: str): + self.size = size + self.reg1 = reg1 + self.reg2 = reg2 + + def __repr__(self): + return "OpRegisterIndirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0:d1 + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), + InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return (il.reg(4, self.reg1), il.reg(4, self.reg2)) + a = il.reg(4, self.reg1) + b = il.reg(4, self.reg2) + return ((a, b), [a, b]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + #return (il.store(1 << self.size, il.reg(4, self.reg1), values[0], flags), il.store(1 << self.size, il.reg(4, self.reg2), values[1], flags)) + return (il.store(1 << self.size, il.reg(4, self.reg1), values[0]), il.store(1 << self.size, il.reg(4, self.reg2), values[1])) + + +class OpRegisterIndirectPostincrement(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirectPostincrement(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # (a0)+ + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), + InstructionTextToken(InstructionTextTokenType.TextToken, "+") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.set_reg(4, + self.reg, + il.add(4, + il.reg(4, self.reg), + il.const(4, 1 << self.size) + ) + ) + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectPredecrement(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirectPredecrement(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # -(a0) + return [ + InstructionTextToken(InstructionTextTokenType.TextToken, "-"), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.set_reg(4, + self.reg, + il.sub(4, + il.reg(4, self.reg), + il.const(4, 1 << self.size) + ) + ) + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectDisplacement(Operand): + def __init__(self, size: int, reg: str, offset: int): + self.size = size + self.reg = reg + self.offset = offset + + def __repr__(self): + return "OpRegisterIndirectDisplacement(%d, %s, 0x%x)" % (self.size, self.reg, self.offset) + + def format(self, addr: int) -> List[InstructionTextToken]: + if self.reg == 'pc': + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:08x}".format(addr+2+self.offset), addr+2+self.offset, 4), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + else: + # $1234(a0) + return [ + InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:04x}".format(self.offset), self.offset, 2), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + if self.reg == 'pc': + r = il.const_pointer(4, il.current_address+2+self.offset) + return (r, [r]) + else: + a = il.reg(4, self.reg) + b = il.const(2, self.offset) + c = il.add(4, a, b) + return (c, [a, b, c]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectIndex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: int, scale: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + + def __repr__(self): + return "OpRegisterIndirectIndex(%d, %s, 0x%x, %s, %d, %d)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale) + + def format(self, addr: int) -> List[InstructionTextToken]: + # $1234(a0,a1.l*4) + tokens = [] + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + e = il.add(4, a, b) + + c = il.reg(4 if self.ireg_long else 2, self.ireg) + d = il.const(1, self.scale) + f = il.mult(4, c, d) + + g = il.add(4, e, f) + return (g, [a, b, c, d, e, f, g]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirect(Operand): + def __init__(self, size: int, reg: str, offset: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirect(%d, %s, %d, %d)" % (self.size, self.reg, self.offset, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0],$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.const(4, self.outer_displacement) + + f = il.add(4, d, e) + return (f, [a, b, c, d, e, f]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirectPostindex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirectPostindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0],a1.l*4,$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # j = il.add(4, d, i) + # d = il.load(4, c) + # c = il.add(4, a, b) + # a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # b = il.const(4, self.offset) + # ) + # ), + # i = il.add(4, g, h) + # g = il.mult(4, e, f) + # e = il.reg(4 if self.ireg_long else 2, self.ireg), + # f = il.const(1, self.scale) + # ), + # h = il.const(4, self.outer_displacement) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.reg(4 if self.ireg_long else 2, self.ireg), + f = il.const(1, self.scale) + # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) + # FIXME: why 'e' is a tuple with a second element missing??? + g = il.mult(4, e[0], f) + + h = il.const(4, self.outer_displacement) + i = il.add(4, g, h) + + j = il.add(4, d, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirectPreindex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirectPreindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0,a1.l*4],$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + + d = il.reg(4 if self.ireg_long else 2, self.ireg) + e = il.const(1, self.scale) + f = il.mult(4, d, e) + + g = il.add(4, c, f) + h = il.load(4, g) + + i = il.const(4, self.outer_displacement) + j = il.add(4, h, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpAbsolute(Operand): + def __init__(self, size, address, address_size, address_width): + self.size = size + self.address = address + self.address_size = address_size + self.address_width = address_width + + def __repr__(self): + return "OpAbsolute(%d, 0x%x, %d, %d)" % (self.size, self.address, self.address_size, self.address_width) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ($1234).w + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.address, 1 << self.address_size), self.address, 1 << self.address_size), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"+SizeSuffix[self.address_size]) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + # return il.sign_extend(self.address_width, + # il.const(1 << self.address_size, self.address) + # ) + a = il.const(1 << self.address_size, self.address) + b = il.sign_extend(self.address_width, a) + return (b, [a, b]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpImmediate(Operand): + def __init__(self, size, value): + self.size = size + self.value = value + + def __repr__(self): + return "OpImmediate(%d, 0x%x)" % (self.size, self.value) + + def format(self, addr: int) -> List[InstructionTextToken]: + # #$1234 + return [ + InstructionTextToken(InstructionTextTokenType.TextToken, "#"), + #InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) + InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.const(1 << self.size, self.value) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + return il.unimplemented() + + +# condition mapping to LLIL flag conditions +ConditionMapping = { + 'hi': LowLevelILFlagCondition.LLFC_UGT, + 'ls': LowLevelILFlagCondition.LLFC_ULE, + 'cc': LowLevelILFlagCondition.LLFC_UGE, + 'cs': LowLevelILFlagCondition.LLFC_ULT, + 'ne': LowLevelILFlagCondition.LLFC_NE, + 'eq': LowLevelILFlagCondition.LLFC_E, + 'vc': LowLevelILFlagCondition.LLFC_NO, + 'vs': LowLevelILFlagCondition.LLFC_O, + 'pl': LowLevelILFlagCondition.LLFC_POS, + 'mi': LowLevelILFlagCondition.LLFC_NEG, + 'ge': LowLevelILFlagCondition.LLFC_SGE, + 'lt': LowLevelILFlagCondition.LLFC_SLT, + 'gt': LowLevelILFlagCondition.LLFC_SGT, + 'le': LowLevelILFlagCondition.LLFC_SLE, +} + +class M68000(Architecture): + name = "M68000" + address_size = 3 + default_int_size = 4 + max_instr_length = 22 + endianness = Endianness.BigEndian + regs = { + 'd0': RegisterInfo('d0', 4), + 'd1': RegisterInfo('d1', 4), + 'd2': RegisterInfo('d2', 4), + 'd3': RegisterInfo('d3', 4), + 'd4': RegisterInfo('d4', 4), + 'd5': RegisterInfo('d5', 4), + 'd6': RegisterInfo('d6', 4), + 'd7': RegisterInfo('d7', 4), + 'a0': RegisterInfo('a0', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a1': RegisterInfo('a1', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a2': RegisterInfo('a2', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a3': RegisterInfo('a3', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a4': RegisterInfo('a4', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a5': RegisterInfo('a5', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'a6': RegisterInfo('a6', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'sp': RegisterInfo('sp', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + + 'sr': RegisterInfo('sr', 2), + 'ccr': RegisterInfo('sr', 1), + + # control registers + # MC68010/MC68020/MC68030/MC68040/CPU32 + 'sfc': RegisterInfo('sfc', 4), + 'dfc': RegisterInfo('dfc', 4), + 'usp': RegisterInfo('usp', 4), + 'vbr': RegisterInfo('vbr', 4), + # MC68020/MC68030/MC68040 + 'cacr': RegisterInfo('cacr', 4), + 'caar': RegisterInfo('caar', 4), + 'msp': RegisterInfo('msp', 4), + 'isp': RegisterInfo('isp', 4), + # MC68040/MC68LC040 + 'tc': RegisterInfo('tc', 4), + 'itt0': RegisterInfo('itt0', 4), + 'itt1': RegisterInfo('itt1', 4), + 'dtt0': RegisterInfo('dtt0', 4), + 'dtt1': RegisterInfo('dtt1', 4), + 'mmusr': RegisterInfo('mmusr', 4), + 'urp': RegisterInfo('urp', 4), + 'srp': RegisterInfo('srp', 4), + # MC68EC040 + 'iacr0': RegisterInfo('iacr0', 4), + 'iacr1': RegisterInfo('iacr1', 4), + 'dacr0': RegisterInfo('dacr0', 4), + 'dacr1': RegisterInfo('dacr1', 4), + } + stack_pointer = 'sp' + flags = ['x', 'n', 'z', 'v', 'c'] + flag_write_types = ['*', 'nzvc'] + flags_written_by_flag_write_type = { + '*': ['x', 'n', 'z', 'v', 'c'], + 'nzvc': ['n', 'z', 'v', 'c'], + } + flag_roles = { + 'x': FlagRole.SpecialFlagRole, + 'n': FlagRole.NegativeSignFlagRole, + 'z': FlagRole.ZeroFlagRole, + 'v': FlagRole.OverflowFlagRole, + 'c': FlagRole.CarryFlagRole, + } + flags_required_for_flag_condition = { + LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], # hi + LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], # ls + LowLevelILFlagCondition.LLFC_UGE: ['c'], # cs + LowLevelILFlagCondition.LLFC_ULT: ['c'], # cs + LowLevelILFlagCondition.LLFC_NE: ['z'], # ne + LowLevelILFlagCondition.LLFC_E: ['z'], # eq + LowLevelILFlagCondition.LLFC_NO: ['v'], # vc + LowLevelILFlagCondition.LLFC_O: ['v'], # vs + LowLevelILFlagCondition.LLFC_POS: ['n'], # pl + LowLevelILFlagCondition.LLFC_NEG: ['n'], # mi + LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], # ge + LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], # lt + LowLevelILFlagCondition.LLFC_SGT: ['n', 'v', 'z'], # gt + LowLevelILFlagCondition.LLFC_SLE: ['n', 'v', 'z'], # le + } + control_registers = { + } + memory_indirect = False + movem_store_decremented = False + + def decode_effective_address(self, mode: int, register: int, data: bytes, size: Optional[int] = None) -> Tuple[Optional[Operand], Optional[int]]: + mode &= 0x07 + register &= 0x07 + + reg = None + + if mode == 0: + # data register direct + return (OpRegisterDirect(size, Registers[register]), 0) + elif mode == 1: + # address register direct + return (OpRegisterDirect(size, Registers[register+8]), 0) + elif mode == 2: + # address register indirect + return (OpRegisterIndirect(size, Registers[register+8]), 0) + elif mode == 3: + # address register indirect with postincrement + return (OpRegisterIndirectPostincrement(size, Registers[register+8]), 0) + elif mode == 4: + # address register indirect with predecrement + return (OpRegisterIndirectPredecrement(size, Registers[register+8]), 0) + elif mode == 5: + # address register indirect with displacement + return (OpRegisterIndirectDisplacement(size, Registers[register+8], struct.unpack_from('>h', data, 0)[0]), 2) + elif mode == 6: + # extended addressing mode + reg = Registers[register+8] + elif mode == 7: + if register == 0: + # absolute short + val = struct.unpack_from('>H', data, 0)[0] + if val & 0x8000: + if self.address_size == 4: + val |= 0xffff0000 # extend to 32-bits + else: + val |= 0xff0000 # extend to 24-bits (for 68000) + return (OpAbsolute(size, val, 1, self.address_size), 2) + if register == 1: + # absolute long + return (OpAbsolute(size, struct.unpack_from('>L', data, 0)[0], 2, self.address_size), 4) + elif register == 2: + # program counter indirect with displacement + return (OpRegisterIndirectDisplacement(size, 'pc', struct.unpack_from('>h', data, 0)[0]), 2) + elif register == 3: + # extended addressing mode + reg = 'pc' + elif register == 4: + # immediate + if size == None: + # unspecified length + return (OpImmediate(size, None), None) + elif size == SIZE_BYTE: + # byte + return (OpImmediate(size, struct.unpack_from('>b', data, 1)[0]), 2) + elif size == 1: + # word + return (OpImmediate(size, struct.unpack_from('>h', data, 0)[0]), 2) + elif size == 2: + # long + return (OpImmediate(size, struct.unpack_from('>l', data, 0)[0]), 4) + + if reg is not None: + extra = struct.unpack_from('>H', data, 0)[0] + # index register + xn = Registers[extra >> 12] + # index register size + index_size = (extra >> 11) & 1 + # index register scale + scale = 1 << ((extra >> 9) & 3) + length = 2 + + if extra & 0x0100: + # full extension word + bd = 0 + od = 0 + + # base displacement + if not (extra >> 7) & 1: + if (extra >> 4) & 3 == 2: + # word base displacement + bd = struct.unpack_from('>h', data, length)[0] + length += 2 + elif (extra >> 4) & 3 == 3: + # long base displacement + bd = struct.unpack_from('>L', data, length)[0] + length += 4 + + # outer displacement + if extra & 3 == 2: + # word outer displacement + od = struct.unpack_from('>h', data, length)[0] + length += 2 + elif extra & 3 == 3: + # long outer displacement + od = struct.unpack_from('>L', data, length)[0] + length += 4 + + # suppress index register + if extra & 7 == 0: + return (OpRegisterIndirectIndex(size, reg, bd, xn, index_size, scale), length) + elif (extra >> 6) & 1: + return (OpMemoryIndirect(size, reg, bd, od), length) + elif (extra >> 2) & 1: + return (OpMemoryIndirectPostindex(size, reg, bd, xn, index_size, scale, od), length) + else: + return (OpMemoryIndirectPreindex(size, reg, bd, xn, index_size, scale, od), length) + else: + # brief extension word + # 8 bit displacement + d8 = extra & 0xff + if d8 & 0x80: + d8 -= 256 + return (OpRegisterIndirectIndex(size, reg, d8, xn, index_size, scale), length) + + return (None, None) + + def decode_instruction(self, data: bytes, addr: int) -> Tuple[str, int, Optional[int], Optional[Operand], Optional[Operand], Optional[Operand]]: + error_value = ('unimplemented', len(data), None, None, None, None) + if len(data) < 2: + return error_value + + instruction = struct.unpack_from('>H', data)[0] + + msb = instruction >> 8 + operation_code = msb >> 4 + + #print((hex(addr), hex(instruction))) + + instr = None + length = None + size = None + source = None + dest = None + third = None + + if operation_code == 0x0: + # Bit manipulation/MOVEP/Immed late + if instruction & 0xf9c0 == 0x00c0: + # rtm, callm, chk2, cmp2 + if instruction & 0xfff0 == 0x06c0: + instr = 'rtm' + dest = OpRegisterDirect(SIZE_LONG, Registers[instruction & 15]) + length = 2 + elif instruction & 0xffc0 == 0x06c0: + instr = 'callm' + source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check + if extra_dest is None: + return error_value + length = 4+extra_dest + else: + size = (instruction >> 9) & 3 + extra = struct.unpack_from('>H', data, 2)[0] + if extra & 0x0800: + instr = 'chk2' + else: + instr = 'cmp2' + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check + dest = OpRegisterDirect(size, Registers[(instruction >> 12) & 15]) + if extra_source is None: + return error_value + length = 4+extra_source + elif instruction & 0xffc0 in (0x0ac0, 0x0cc0, 0x0ec0): + if instruction & 0xf9ff == 0x08fc: + instr = 'cas2' + size = ((instruction >> 9) & 3) - 1 + extra1 = struct.unpack_from('>H', data, 2)[0] + extra2 = struct.unpack_from('>H', data, 4)[0] + source = OpRegisterDirectPair(size, Registers[extra1 & 7], Registers[extra2 & 7]) + dest = OpRegisterDirectPair(size, Registers[(extra1 >> 6) & 7], Registers[(extra2 >> 6) & 7]) + third = OpRegisterIndirectPair(size, Registers[(extra1 >> 12) & 15], Registers[(extra2 >> 12) & 15]) + length = 6 + else: + instr = 'cas' + size = ((instruction >> 9) & 3) - 1 + extra = struct.unpack_from('>H', data, 2)[0] + source = OpRegisterDirect(size, Registers[extra & 7]) + dest = OpRegisterDirect(size, Registers[(extra >> 6) & 7]) + third, extra_third = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) + if extra_third is None: + return error_value + length = 4+extra_third + elif msb in (0x00, 0x02, 0x04, 0x06, 0x0a, 0x0c): + # ORI, ANDI, SUBI, ADDI, EORI, CMPI + if msb == 0x00: + instr = 'ori' + elif msb == 0x02: + instr = 'andi' + elif msb == 0x04: + instr = 'subi' + elif msb == 0x06: + instr = 'addi' + elif msb == 0x0a: + instr = 'eori' + elif msb == 0x0c: + instr = 'cmpi' + size = (instruction >> 6) & 0x03 + source, extra_source = self.decode_effective_address(7, 4, data[2:], size) + if instruction & 0x00ff == 0x003c: + dest = OpRegisterDirect(size, 'ccr') + extra_dest = 0 + elif instruction & 0x00ff == 0x007c: + dest = OpRegisterDirect(size, 'sr') + extra_dest = 0 + else: + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + + if dest is None: + instr = None + else: + length = 2+extra_source+extra_dest + elif msb == 0x08: + # btst, bchg, bclr, bset with constant + if instruction & 0xffc0 == 0x0800: + instr = 'btst' + elif instruction & 0xffc0 == 0x0840: + instr = 'bchg' + elif instruction & 0xffc0 == 0x0880: + instr = 'bclr' + elif instruction & 0xffc0 == 0x08C0: + instr = 'bset' + source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) + if isinstance(dest, OpRegisterDirect): + dest.size = SIZE_LONG + if dest is None: + instr = None + else: + length = 4+extra_dest + elif msb & 0xf1 == 0x01: + # movep, btst, bchg, bclr, bset with register + if instruction & 0xf138 == 0x0108: + instr = 'movep' + size = ((instruction >> 6) & 1) + 1 + source, extra_source = self.decode_effective_address(5, instruction, data[2:], SIZE_BYTE) # check + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + length = 2+extra_source + if instruction & 0x0080: + source, dest = dest, source + else: + if instruction & 0xf1c0 == 0x0100: + instr = 'btst' + elif instruction & 0xf1c0 == 0x0140: + instr = 'bchg' + elif instruction & 0xf1c0 == 0x0180: + instr = 'bclr' + elif instruction & 0xf1c0 == 0x01c0: + instr = 'bset' + source = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) # check + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) + if isinstance(dest, OpRegisterDirect): + dest.size = SIZE_LONG + if dest is None: + instr = None + else: + length = 2+extra_dest + elif instruction & 0xff00 == 0x0e00: + instr = 'moves' + extra = struct.unpack_from('>H', data, 2)[0] + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[extra >> 12]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) + if extra & 0x0800: + source, dest = dest, source + if extra_source is None: + return error_value + length = 4+extra_source + elif operation_code in (0x1, 0x2, 0x3): + # move + instr = 'move' + if operation_code == 0x1: + # Move byte + size = SIZE_BYTE + elif operation_code == 0x2: + # Move long + size = SIZE_LONG + elif operation_code == 0x3: + # Move word + size = SIZE_WORD + + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if source is None: + instr = None + else: + dest, extra_dest = self.decode_effective_address(instruction >> 6, instruction >> 9, data[2+extra_source:], size) + if dest is None or isinstance(dest, OpImmediate): + instr = None + else: + if isinstance(dest, OpRegisterDirect) and (dest.reg[0] == 'a' or dest.reg == 'sp'): + instr = 'movea' + length = 2+extra_source+extra_dest + elif operation_code == 0x4: + # Miscellaneous + extra_source = 0 + extra_dest = 0 + size = None + skip_ea = False + if instruction & 0xf100 == 0x4100: + # lea, extb, chk + if instruction & 0xf1c0 == 0x41c0: + if instruction & 0x0038: + instr = 'lea' + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + instr = 'extb' + size = SIZE_LONG + else: + instr = 'chk' + if instruction & 0x0080: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + elif msb == 0x40: + # move from sr, negx + if instruction & 0xffc0 == 0x40c0: + # move from sr + instr = 'move' + size = SIZE_WORD + source = OpRegisterDirect(size, 'sr') + else: + instr = 'negx' + size = instruction >> 6 + elif msb == 0x42: + # move to ccr, clr + if instruction & 0xffc0 == 0x42c0: + # move to ccr + instr = 'move' + size = SIZE_WORD + source = OpRegisterDirect(size, 'ccr') + else: + instr = 'clr' + size = instruction >> 6 + elif msb == 0x44: + # move from ccr, neg + if instruction & 0xffc0 == 0x44c0: + # move from ccr + instr = 'move' + size = SIZE_WORD + dest = OpRegisterDirect(size, 'ccr') + else: + instr = 'neg' + size = instruction >> 6 + elif msb == 0x46: + # move from sr, not + if instruction & 0xffc0 == 0x46c0: + # move from sr + instr = 'move' + size = SIZE_WORD + dest = OpRegisterDirect(size, 'sr') + else: + instr = 'not' + size = instruction >> 6 + elif msb in (0x48, 0x4c): + # link, nbcd, movem, ext, swap, bkpt, pea, divs, divu, divsl, divul, muls, mulu + if instruction & 0xfff8 == 0x4808: + instr = 'link' + size = SIZE_LONG + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], size) + elif instruction & 0xffc0 == 0x4800: + instr = 'nbcd' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_BYTE) + skip_ea = True + elif instruction & 0xfb80 == 0x4880: + if instruction & 0x0040: + size = SIZE_LONG + else: + size = SIZE_WORD + if instruction & 0x0038: + instr = 'movem' + extra_source = 2 + extra = struct.unpack_from('>H', data, 2)[0] + reg_list = [] + if instruction & 0x0038 == 0x0020: + for k in range(16): + if extra << k & 0x8000: + reg_list.append(Registers[k]) + else: + for k in range(16): + if extra >> k & 0x0001: + reg_list.append(Registers[k]) + source = OpRegisterMovemList(size, reg_list) + else: + instr = 'ext' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + skip_ea = True + if instruction & 0x0400: + source, dest = dest, source + elif instruction & 0xfff8 == 0x4840: + instr = 'swap' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) + skip_ea = True + elif instruction & 0xfff8 == 0x4848: + instr = 'bkpt' + source = OpImmediate(SIZE_BYTE, instruction & 7) + skip_ea = True + elif instruction & 0xffc0 == 0x4840: + instr = 'pea' + size = SIZE_LONG + elif msb == 0x4c: + size = SIZE_LONG + extra_dest = 2 + extra = struct.unpack_from('>H', data, 2)[0] + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) + dh = Registers[extra & 7] + dl = Registers[(extra >> 12) & 7] + dest = OpRegisterDirect(size, dl) + if instruction & 0x0040: + if extra & 0x0800: + instr = 'divs' + else: + instr = 'divu' + if extra & 0x0400: + dest = OpRegisterDirectPair(size, dh, dl) + elif dh != dl: + dest = OpRegisterDirectPair(size, dh, dl) + instr += 'l' + else: + if extra & 0x0800: + instr = 'muls' + else: + instr = 'mulu' + if extra & 0x0400: + dest = OpRegisterDirectPair(size, dh, dl) + skip_ea = True + elif msb == 0x4a: + # bgnd, illegal, tas, tst + if instruction == 0x4afa: + instr = 'bgnd' + skip_ea = True + elif instruction == 0x4afc: + instr = 'illegal' + skip_ea = True + elif instruction & 0xffc0 == 0x4ac0: + instr = 'tas' + skip_ea = True + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) + else: + instr = 'tst' + size = instruction >> 6 + elif msb == 0x4e: + # trap, link, unlk, move, reset, nop, stop, rte, rtd, rts, trapv, rtr, movec, jsr, jmp + if instruction & 0xfff0 == 0x4e40: + instr = 'trap' + length = 2 + source = OpImmediate(SIZE_BYTE, instruction & 15) + skip_ea = True + elif instruction & 0xfff0 == 0x4e50: + if instruction & 0xfff8 == 0x4e50: + instr = 'link' + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], 1) + else: + instr = 'unlk' + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) + skip_ea = True + elif instruction & 0xfff0 == 0x4e60: + instr = 'move' + size = SIZE_LONG + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) + dest = OpRegisterDirect(size, 'usp') + if instruction & 0x08: + source, dest = dest, source + skip_ea = True + elif instruction == 0x4e70: + instr = 'reset' + skip_ea = True + elif instruction == 0x4e71: + instr = 'nop' + skip_ea = True + elif instruction == 0x4e72: + instr = 'stop' + source = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) + extra_source = 2 + skip_ea = True + elif instruction == 0x4e73: + instr = 'rte' + skip_ea = True + elif instruction == 0x4e74: + instr = 'rtd' + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], SIZE_WORD) + skip_ea = True + elif instruction == 0x4e75: + instr = 'rts' + skip_ea = True + elif instruction == 0x4e76: + instr = 'trapv' + skip_ea = True + elif instruction == 0x4e77: + instr = 'rtr' + skip_ea = True + elif instruction & 0xfffe == 0x4e7A: + instr = 'movec' + size = SIZE_LONG + extended = struct.unpack_from('>H', data, 2)[0] + control_reg = self.control_registers.get(extended & 0x0fff, None) + reg = (extended >> 12) & 15 + if control_reg is None: + instr = None + else: + source = OpRegisterDirect(size, control_reg) + dest = OpRegisterDirect(size, Registers[reg]) + if instruction & 1: + source, dest = dest, source + extra_source = 2 + skip_ea = True + elif instruction & 0xff80 == 0x4e80: + if instruction & 0xffc0 == 0x4e80: + instr = 'jsr' + else: + instr = 'jmp' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) + skip_ea = True + if instr is not None: + if size is not None: + size &= 3 + if skip_ea: + pass + elif dest is None: + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + else: + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) + if extra_source is None or extra_dest is None: + instr = None + else: + length = 2+extra_source+extra_dest + elif operation_code == 0x5: + # ADDQ/SUBQ/Scc/DBcc/TRAPcc + if instruction & 0xf0c0 == 0x50c0: + if instruction & 0xf0f8 == 0x50c8: + instr = 'db'+Condition[(instruction >> 8) & 0xf] + source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) + dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', struct.unpack_from('>h', data, 2)[0]) + length = 4 + elif instruction & 0xf0ff in (0x50fa, 0x50fb, 0x50fc): + instr = 'trap'+Condition[(instruction >> 8) & 0xf] + if instruction & 7 == 2: + length = 4 + source = OpImmediate(SIZE_WORD, struct.unpack_from('>H', data, 2)[0]) + elif instruction & 7 == 3: + length = 6 + source = OpImmediate(SIZE_LONG, struct.unpack_from('>L', data, 2)[0]) + elif instruction & 7 == 4: + length = 2 + else: + instr = 's'+Condition[(instruction >> 8) & 0xf] + size = SIZE_BYTE + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_dest is None: + return error_value + length = 2+extra_dest + else: + if instruction & 0x0100: + instr = 'subq' + else: + instr = 'addq' + val = (instruction >> 9) & 7 + if val == 0: + val = 8 + size = (instruction >> 6) & 3 + source = OpImmediate(SIZE_BYTE, val) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_dest is None: + return error_value + length = 2+extra_dest + elif operation_code == 0x6: + # Bcc/BSR/BRA + if msb == 0x60: + instr = 'bra' + elif msb == 0x61: + instr = 'bsr' + else: + instr = 'b'+Condition[(instruction >> 8) & 0xf] + val = instruction & 0xff + if val == 0: + val = struct.unpack_from('>h', data, 2)[0] + length = 4 + elif val == 0xff: + val = struct.unpack_from('>L', data, 2)[0] + length = 6 + else: + if val & 0x80: + val -= 256 + length = 2 + dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', val) + elif operation_code == 0x7: + # MOVEQ + instr = 'moveq' + size = SIZE_LONG + val = instruction & 0xff + if val & 0x80: + val |= 0xffffff00 + source = OpImmediate(size, val) + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + length = 2 + elif operation_code == 0x8: + # OR/DIV/SBCD + if instruction & 0xf0c0 == 0x80c0: + if instruction & 0x0100: + instr = 'divs' + else: + instr = 'divu' + size = SIZE_WORD + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_source is None: + return error_value + length = 2+extra_source + elif instruction & 0xf1f0 == 0x8100: + instr = 'sbcd' + length = 2 + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + elif instruction & 0xf130 == 0x8100: + if instruction & 0x0040: + instr = 'pack' + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[(instruction & 7) + 8]) + else: + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) + else: + instr = 'unpk' + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + else: + dest = OpRegisterDirect(SIZE_WORD, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + length = 4 + third = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) + else: + instr = 'or' + opmode = (instruction >> 6) & 0x7 + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if opmode & 4: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0x9: + # SUB/SUBA/SUBX + instr = 'sub' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'suba' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'sub' and opmode & 4: + if isinstance(source, OpRegisterDirect): + instr = 'subx' + if source.reg[0] == 'a' or source.reg == 'sp': + source = OpRegisterIndirectPredecrement(size, source.reg) + dest = OpRegisterIndirectPredecrement(size, dest.reg) + else: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xa: + # (unassigned, reserved) + pass + elif operation_code == 0xb: + # CMP/EOR + instr = 'cmp' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'cmpa' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(size, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'cmp' and opmode & 4: + if instruction & 0x0038 == 0x0008: + instr = 'cmpm' + source = OpRegisterIndirectPostincrement(size, Registers[instruction & 15]) + dest = OpRegisterIndirectPostincrement(size, Registers[((instruction >> 9) & 7) + 8]) + else: + source, dest = dest, source + instr = 'eor' + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xc: + # AND/MUL/ABCD/EXG + if instruction & 0xf0c0 == 0xc0c0: + if instruction & 0x0100: + instr = 'muls' + else: + instr = 'mulu' + size = SIZE_WORD + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + if extra_source is None: + return error_value + length = 2+extra_source + elif instruction & 0xf130 == 0xc100: + if instruction & 0xf1f0 == 0xc100: + instr = 'abcd' + if instruction & 0x0008: + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + else: + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + else: + instr = 'exg' + size = SIZE_LONG + source = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + dest = OpRegisterDirect(size, Registers[instruction & 7]) + if instruction & 0xf1f8 == 0xc148: + source = OpRegisterIndirectPredecrement(size, Registers[((instruction >> 9) & 7) + 8]) + dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) + if instruction & 0xf1f8 == 0xc188: + dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) + length = 2 + else: + instr = 'and' + opmode = (instruction >> 6) & 0x7 + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if opmode & 4: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xd: + # ADD/ADDA/ADDX + instr = 'add' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'adda' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'add' and opmode & 4: + if isinstance(source, OpRegisterDirect): + instr = 'addx' + if source.reg[0] == 'a' or source.reg == 'sp': + source = OpRegisterIndirectPredecrement(size, source.reg) + dest = OpRegisterIndirectPredecrement(size, dest.reg) + else: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xe: + # shift/rotate/bit field + if instruction & 0xF8C0 == 0xE0C0: + # shift/rotate + size = SIZE_WORD + direction = (instruction >> 8) & 1 + style = (instruction >> 9) & 3 + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + instr = ShiftStyle[style] + if direction: + instr += 'l' + else: + instr += 'r' + if extra_dest is None: + return error_value + length = 2+extra_dest + elif instruction & 0xF8C0 == 0xE8C0: + # bit field instructions + # TODO + style = (instruction >> 8) & 0x7 + instr = 'bf'+BitfieldStyle[style] + length = 4 + else: + # shift/rotate + size = (instruction >> 6) & 3 + direction = (instruction >> 8) & 1 + style = (instruction >> 3) & 3 + if (instruction >> 5) & 1: + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction >> 9) & 7]) + else: + val = (instruction >> 9) & 7 + if val == 0: + val = 8 + source = OpImmediate(SIZE_BYTE, val) + dest = OpRegisterDirect(size, Registers[instruction & 7]) + instr = ShiftStyle[style] + if direction: + instr += 'l' + else: + instr += 'r' + length = 2 + elif operation_code == 0xf: + if instruction & 0xff20 == 0xf420: + instr = 'cpush' + length = 2 + elif instruction & 0xff80 == 0xff80: + instruction = 'illFF' + length = 2 + # coprocessor instructions + # TODO + if instr is None: + # FIXME uncomment to debug + #log_error('Bad opcode at 0x{:x}'.format(addr)) + return error_value + + #print((instr, length, size, source, dest, third)) + return instr, length, size, source, dest, third + + def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: int, size: int, source: Optional[Operand], dest: Optional[Operand], third: Optional[Operand]): + size_bytes = None + if size is not None: + size_bytes = 1 << size + + if instr in ('move', 'moveq'): + if instr == 'move' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): + il.append(il.set_reg(1, LLIL_TEMP(0), source.get_source_il(il))) + il.append(il.set_flag('c', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x01)))) + il.append(il.set_flag('v', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x02)))) + il.append(il.set_flag('z', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x04)))) + il.append(il.set_flag('n', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x08)))) + il.append(il.set_flag('x', il.test_bit(1, il.reg(1, LLIL_TEMP(0)), il.const(1, 0x10)))) + else: + flags = 'nzvc' + if ((isinstance(source, OpRegisterDirect) and source.reg in ('usp', 'ccr', 'sr')) or + (isinstance(dest, OpRegisterDirect) and dest.reg in ('usp', 'ccr', 'sr'))): + # move to/from control registers do not set flags + flags = 0 + il.append( + dest.get_dest_il(il, + source.get_source_il(il), + flags + ) + ) + elif instr in ('movea', 'movec'): + # dest.size = SIZE_LONG + # il.append( + # dest.get_dest_il(il, + # il.sign_extend(4, + # source.get_source_il(il) + # ) + # ) + # ) + il.append( + dest.get_dest_il(il, + source.get_source_il(il) + ) + ) + elif instr == 'clr': + il.append( + dest.get_dest_il(il, + il.const(4, 0), + 'nzvc' + ) + ) + elif instr in ('add', 'addi', 'addq'): + il.append( + dest.get_dest_il(il, + il.add(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='*' + ) + ) + ) + elif instr == 'adda': + dest.size = SIZE_LONG + il.append( + dest.get_dest_il(il, + il.add(4, + dest.get_source_il(il), + il.sign_extend(4, + source.get_source_il(il) + ) + ) + ) + ) + elif instr == 'addx': + il.append( + dest.get_dest_il(il, + il.add(size_bytes, + il.add(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='*' + ), + il.flag('x'), + flags='*' + ) + ) + ) + elif instr in ('sub', 'subi', 'subq'): + il.append( + dest.get_dest_il(il, + il.sub(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='*' + ) + ) + ) + elif instr == 'suba': + dest.size = SIZE_LONG + il.append( + dest.get_dest_il(il, + il.sub(4, + dest.get_source_il(il), + il.sign_extend(4, + source.get_source_il(il) + ) + ) + ) + ) + elif instr == 'subx': + il.append( + dest.get_dest_il(il, + il.sub(size_bytes, + il.sub(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='*' + ), + il.flag('x'), + flags='*' + ) + ) + ) + elif instr == 'neg': + il.append( + dest.get_dest_il(il, + il.neg_expr(size_bytes, + dest.get_source_il(il), + flags='*' + ) + ) + ) + elif instr == 'negx': + il.append( + dest.get_dest_il(il, + il.sub(size_bytes, + il.neg_expr(size_bytes, + dest.get_source_il(il), + flags='*' + ), + il.flag('x'), + flags='*' + ) + ) + ) + elif instr == 'abcd': + # TODO + il.append(il.unimplemented()) + elif instr == 'sbcd': + # TODO + il.append(il.unimplemented()) + elif instr == 'nbcd': + # TODO + il.append(il.unimplemented()) + elif instr == 'pack': + il.append( + il.set_reg(2, + LLIL_TEMP(0), + il.add(2, + source.get_source_il(il), + third.get_source_il(il) + ) + ) + ) + il.append( + dest.get_dest_il(il, + il.or_expr(1, + il.and_expr(2, + il.reg(2, LLIL_TEMP(0)), + il.const(2, 0x000F) + ), + il.logical_shift_right(2, + il.and_expr(2, + il.reg(2, LLIL_TEMP(0)), + il.const(2, 0x0F00) + ), + il.const(1, 4) + ) + ) + ) + ) + elif instr == 'unpk': + il.append( + il.set_reg(1, + LLIL_TEMP(0), + source.get_source_il(il) + ) + ) + il.append( + dest.get_dest_il(il, + il.add(2, + il.or_expr(2, + il.and_expr(2, + il.reg(1, LLIL_TEMP(0)), + il.const(1, 0x0F) + ), + il.shift_left(2, + il.and_expr(2, + il.reg(1, LLIL_TEMP(0)), + il.const(1, 0xF0) + ), + il.const(1, 4) + ) + ), + third.get_source_il(il) + ) + ) + ) + elif instr in ('muls', 'mulu'): + if isinstance(dest, OpRegisterDirectPair): + il.append( + il.set_reg_split(4, + dest.reg1, + dest.reg2, + il.mult(4, + source.get_source_il(il), + dest.get_source_il(il)[0], + flags='nzvc' + ) + ) + ) + else: + il.append( + il.set_reg(4, + dest.reg, + il.mult(4, + source.get_source_il(il), + dest.get_source_il(il), + flags='nzvc' + ) + ) + ) + elif instr == 'divs': + if size == 1: + dividend_il = dest.get_source_il(il) + divisor_il = source.get_source_il(il) + dest.size = SIZE_LONG + il.append( + dest.get_dest_il(il, + il.or_expr(4, + il.shift_left(4, il.mod_signed(2, dividend_il, divisor_il), il.const(1, 16)), + il.div_signed(2, dividend_il, divisor_il, flags='nzvc') + ) + ) + ) + elif isinstance(dest, OpRegisterDirect): + dividend_il = dest.get_source_il(il) + divisor_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.div_signed(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + else: + dividend_il = il.or_expr(8, il.shift_left(8, il.reg(4, dest.reg1), il.const(1, 32)), il.reg(4, dest.reg2)) + divisor_il = source.get_source_il(il) + il.append( + il.set_reg(4, + LLIL_TEMP(0), + il.mod_signed(4, dividend_il, divisor_il) + ) + ) + il.append( + il.set_reg(4, + dest.reg2, + il.div_signed(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + il.append( + il.set_reg(4, + dest.reg1, + il.reg(4, LLIL_TEMP(0)) + ) + ) + elif instr == 'divsl': + dividend_il = il.reg(4, dest.reg2) + divisor_il = source.get_source_il(il) + il.append( + il.set_reg(4, + dest.reg1, + il.mod_signed(4, dividend_il, divisor_il) + ) + ) + il.append( + il.set_reg(4, + dest.reg2, + il.div_signed(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + elif instr == 'divu': + if size == 1: + dividend_il = dest.get_source_il(il) + divisor_il = source.get_source_il(il) + dest.size = SIZE_LONG + il.append( + dest.get_dest_il(il, + il.or_expr(4, + il.shift_left(4, il.mod_unsigned(2, dividend_il, divisor_il), il.const(1, 16)), + il.div_unsigned(2, dividend_il, divisor_il, flags='nzvc') + ) + ) + ) + elif isinstance(dest, OpRegisterDirect): + dividend_il = dest.get_source_il(il) + divisor_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + else: + dividend_il = il.or_expr(8, il.shift_left(8, il.reg(4, dest.reg1), il.const(1, 32)), il.reg(4, dest.reg2)) + divisor_il = source.get_source_il(il) + il.append( + il.set_reg(4, + LLIL_TEMP(0), + il.mod_unsigned(4, dividend_il, divisor_il) + ) + ) + il.append( + il.set_reg(4, + dest.reg2, + il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + il.append( + il.set_reg(4, + dest.reg1, + il.reg(4, LLIL_TEMP(0)) + ) + ) + elif instr == 'divul': + dividend_il = il.reg(4, dest.reg2) + divisor_il = source.get_source_il(il) + il.append( + il.set_reg(4, + dest.reg1, + il.mod_unsigned(4, dividend_il, divisor_il) + ) + ) + il.append( + il.set_reg(4, + dest.reg2, + il.div_unsigned(4, dividend_il, divisor_il, flags='nzvc') + ) + ) + elif instr == 'cas': + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + il.append( + il.sub(size_bytes, + third.get_source_il(il), + source.get_source_il(il), + flags='nzvc' + ) + ) + + equal = LowLevelILLabel() + not_equal = LowLevelILLabel() + + il.append( + il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), equal, not_equal) + ) + + il.mark_label(equal) + + il.append( + third.get_dest_il(il, + dest.get_source_il(il) + ) + ) + + il.append( + il.goto(skip) + ) + + il.mark_label(not_equal) + + il.append( + source.get_dest_il(il, + third.get_source_il(il) + ) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'cas2': + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + il.append( + il.sub(size_bytes, + third.get_source_il(il)[0], + source.get_source_il(il)[0], + flags='nzvc' + ) + ) + + equal = LowLevelILLabel() + not_equal = LowLevelILLabel() + check2 = LowLevelILLabel() + + il.append( + il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), check2, not_equal) + ) + + il.mark_label(check2) + + il.append( + il.sub(size_bytes, + third.get_source_il(il)[1], + source.get_source_il(il)[1], + flags='nzvc' + ) + ) + + il.append( + il.if_expr(il.flag_condition(LowLevelILFlagCondition.LLFC_E), equal, not_equal) + ) + + il.mark_label(equal) + + for it in third.get_dest_il(il, + dest.get_source_il(il) + ): + il.append(it) + + il.append( + il.goto(skip) + ) + + il.mark_label(not_equal) + + for it in source.get_dest_il(il, + third.get_source_il(il) + ): + il.append(it) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'chk': + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + trap = LowLevelILLabel() + check = LowLevelILLabel() + + il.append( + il.if_expr( + il.compare_unsigned_less_than(size_bytes, + dest.get_source_il(il), + il.const(size_bytes, 0) + ), + trap, + check + ) + ) + + il.mark_label(check) + + il.append( + il.if_expr( + il.compare_unsigned_greater_than(size_bytes, + dest.get_source_il(il), + source.get_source_il(il) + ), + trap, + skip + ) + ) + + il.mark_label(trap) + + il.append( + il.system_call() + ) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'chk2': + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + trap = LowLevelILLabel() + check = LowLevelILLabel() + + il.append( + il.set_reg(4, + LLIL_TEMP(0), + source.get_address_il(il) + ) + ) + + il.append( + il.if_expr( + il.compare_unsigned_less_than(size_bytes, + dest.get_source_il(il), + il.load(size_bytes, + il.reg(4, LLIL_TEMP(0)) + ) + ), + trap, + check + ) + ) + + il.mark_label(check) + + il.append( + il.if_expr( + il.compare_unsigned_greater_than(size_bytes, + dest.get_source_il(il), + il.load(size_bytes, + il.add(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, size_bytes) + ) + ) + ), + trap, + skip + ) + ) + + il.mark_label(trap) + + il.append( + il.system_call() + ) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'bchg': + bit_number_il = il.mod_unsigned(1, + source.get_source_il(il), + il.const(1, 8 << dest.size) + ) + il.append( + il.set_flag('z', + il.compare_not_equal(4, + il.test_bit(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ), + il.const(4, 0) + ) + ) + ) + il.append( + dest.get_dest_il(il, + il.xor_expr(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ) + ) + ) + elif instr == 'bclr': + bit_number_il = il.mod_unsigned(1, + source.get_source_il(il), + il.const(1, 8 << dest.size) + ) + il.append( + il.set_flag('z', + il.compare_not_equal(4, + il.test_bit(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ), + il.const(4, 0) + ) + ) + ) + il.append( + dest.get_dest_il(il, + il.and_expr(4, + dest.get_source_il(il), + il.not_expr(4, + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ) + ) + ) + ) + elif instr == 'bset': + bit_number_il = il.mod_unsigned(1, + source.get_source_il(il), + il.const(1, 8 << dest.size) + ) + il.append( + il.set_flag('z', + il.compare_not_equal(4, + il.test_bit(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ), + il.const(4, 0) + ) + ) + ) + il.append( + dest.get_dest_il(il, + il.or_expr(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ) + ) + ) + elif instr == 'btst': + bit_number_il = il.mod_unsigned(1, + source.get_source_il(il), + il.const(1, 8 << dest.size) + ) + il.append( + il.set_flag('z', + il.compare_not_equal(4, + il.test_bit(4, + dest.get_source_il(il), + il.shift_left(4, + il.const(4, 1), + bit_number_il + ) + ), + il.const(4, 0) + ) + ) + ) + elif instr in ('asl', 'lsl'): + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.shift_left(size_bytes, + dest.get_source_il(il), + source_il, + flags='*' + ) + ) + ) + elif instr == 'asr': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.arith_shift_right(size_bytes, + dest.get_source_il(il), + source_il, + flags='*' + ) + ) + ) + elif instr == 'lsr': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.logical_shift_right(size_bytes, + dest.get_source_il(il), + source_il, + flags='*' + ) + ) + ) + elif instr == 'rol': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.rotate_left(size_bytes, + dest.get_source_il(il), + source_il, + flags='*' + ) + ) + ) + elif instr == 'ror': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.rotate_right(size_bytes, + dest.get_source_il(il), + source_il, + flags='*' + ) + ) + ) + elif instr == 'roxl': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.rotate_left_carry(size_bytes, + dest.get_source_il(il), + source_il, + il.flag('x'), + flags='*' + ) + ) + ) + elif instr == 'roxr': + source_il = il.const(1, 1) + if source is not None: + source_il = source.get_source_il(il) + il.append( + dest.get_dest_il(il, + il.rotate_right_carry(size_bytes, + dest.get_source_il(il), + source_il, + il.flag('x'), + flags='*' + ) + ) + ) + elif instr in ('cmp', 'cmpi', 'cmpm'): + il.append( + il.sub(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='nzvc' + ) + ) + elif instr == 'cmpa': + dest.size = SIZE_LONG + il.append( + il.sub(4, + dest.get_source_il(il), + il.sign_extend(4, + source.get_source_il(il) + ), + flags='nzvc' + ) + ) + elif instr == 'cmp2': + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + check = LowLevelILLabel() + + il.append( + il.set_reg(4, + LLIL_TEMP(0), + source.get_address_il(il) + ) + ) + + il.append( + il.sub(size_bytes, + dest.get_source_il(il), + il.load(size_bytes, + il.reg(4, LLIL_TEMP(0)) + ), + flags='nzvc' + ) + ) + + il.append( + il.if_expr( + il.flag_condition(LowLevelILFlagCondition.LLFC_ULT), + skip, + check + ) + ) + + il.mark_label(check) + + il.append( + il.sub(size_bytes, + dest.get_source_il(il), + il.load(size_bytes, + il.add(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, size_bytes) + ) + ), + flags='nzvc' + ) + ) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'tas': + il.append( + il.set_reg(1, LLIL_TEMP(0), dest.get_source_il(il), flags='nzvc') + ) + il.append( + dest.get_dest_il(il, + il.or_expr(1, + il.reg(1, LLIL_TEMP(0)), + il.const(1, 0x80) + ) + ) + ) + elif instr == 'tst': + il.append( + il.sub(size_bytes, + dest.get_source_il(il), + il.const(4, 0), + flags='nzvc' + ) + ) + elif instr in ('and', 'andi'): + if instr == 'andi' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): + if not source.value & 0x01: il.append(il.set_flag('c', il.const(1, 0))) + if not source.value & 0x02: il.append(il.set_flag('v', il.const(1, 0))) + if not source.value & 0x04: il.append(il.set_flag('z', il.const(1, 0))) + if not source.value & 0x08: il.append(il.set_flag('n', il.const(1, 0))) + if not source.value & 0x11: il.append(il.set_flag('x', il.const(1, 0))) + else: + il.append( + dest.get_dest_il(il, + il.and_expr(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='nzvc' + ) + ) + ) + elif instr in ('or', 'ori'): + if instr == 'ori' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): + if source.value & 0x01: il.append(il.set_flag('c', il.const(1, 1))) + if source.value & 0x02: il.append(il.set_flag('v', il.const(1, 1))) + if source.value & 0x04: il.append(il.set_flag('z', il.const(1, 1))) + if source.value & 0x08: il.append(il.set_flag('n', il.const(1, 1))) + if source.value & 0x11: il.append(il.set_flag('x', il.const(1, 1))) + else: + il.append( + dest.get_dest_il(il, + il.or_expr(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='nzvc' + ) + ) + ) + elif instr in ('eor', 'eori'): + if instr == 'eori' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): + if source.value & 0x01: il.append(il.set_flag('c', il.xor_expr(1, il.flag('c'), il.const(1, 1)))) + if source.value & 0x02: il.append(il.set_flag('v', il.xor_expr(1, il.flag('v'), il.const(1, 1)))) + if source.value & 0x04: il.append(il.set_flag('z', il.xor_expr(1, il.flag('z'), il.const(1, 1)))) + if source.value & 0x08: il.append(il.set_flag('n', il.xor_expr(1, il.flag('n'), il.const(1, 1)))) + if source.value & 0x11: il.append(il.set_flag('x', il.xor_expr(1, il.flag('x'), il.const(1, 1)))) + else: + il.append( + dest.get_dest_il(il, + il.xor_expr(size_bytes, + dest.get_source_il(il), + source.get_source_il(il), + flags='nzvc' + ) + ) + ) + elif instr == 'not': + il.append( + dest.get_dest_il(il, + il.not_expr(size_bytes, + dest.get_source_il(il), + flags='nzvc' + ) + ) + ) + elif instr == 'swap': + il.append( + dest.get_dest_il(il, + il.rotate_right(4, + dest.get_source_il(il), + il.const(1, 16) + ) + ) + ) + elif instr == 'exg': + il.append( + il.set_reg(4, LLIL_TEMP(0), source.get_source_il(il)) + ) + il.append( + source.get_dest_il(il, dest.get_source_il(il)) + ) + il.append( + dest.get_dest_il(il, il.reg(4, LLIL_TEMP(0))) + ) + elif instr == 'ext': + if not dest: + il.append(il.unimplemented()) + elif dest.size == 1: + il.append( + il.set_reg(2, + dest.reg, + il.sign_extend(4, + il.reg(1, dest.reg), + flags='nzvc' + ) + ) + ) + else: + il.append( + il.set_reg(4, + dest.reg, + il.sign_extend(4, + il.reg(2, dest.reg), + flags='nzvc' + ) + ) + ) + elif instr == 'extb': + reg = dest.reg + il.append( + il.set_reg(4, + reg, + il.sign_extend(4, + il.reg(1, reg), + flags='nzvc' + ) + ) + ) + elif instr == 'movem': + if isinstance(source, OpRegisterMovemList): + if isinstance(dest, OpRegisterIndirectPredecrement): + il.append( + il.set_reg(4, LLIL_TEMP(0), dest.get_address_il(il)) + ) + if self.movem_store_decremented: + il.append( + il.set_reg(4, + dest.reg, + il.sub(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, len(source.regs)*size_bytes) + ) + ) + ) + for k in range(len(source.regs)): + il.append( + il.store(size_bytes, + il.sub(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, (k+1)*size_bytes) + ), + il.reg(size_bytes, source.regs[len(source.regs)-1-k]) + ) + ) + if not self.movem_store_decremented: + il.append( + il.set_reg(4, + dest.reg, + il.sub(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, len(source.regs)*size_bytes) + ) + ) + ) + else: + il.append( + il.set_reg(4, LLIL_TEMP(0), dest.get_address_il(il)) + ) + for k in range(len(source.regs)): + il.append( + il.store(size_bytes, + il.add(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, k*size_bytes) + ), + il.reg(size_bytes, source.regs[k]) + ) + ) + else: + il.append( + il.set_reg(4, LLIL_TEMP(0), source.get_address_il(il)) + ) + for k in range(len(dest.regs)): + il.append( + il.set_reg(size_bytes, + dest.regs[k], + il.load(size_bytes, + il.add(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, k*size_bytes) + ) + ) + ) + ) + if isinstance(source, OpRegisterIndirectPostincrement): + il.append( + il.set_reg(4, + source.reg, + il.add(4, + il.reg(4, LLIL_TEMP(0)), + il.const(4, len(dest.regs)*size_bytes) + ) + ) + ) + elif instr == 'lea': + il.append( + dest.get_dest_il(il, source.get_address_il(il)) + ) + elif instr == 'pea': + il.append( + il.push(4, dest.get_address_il(il)) + ) + elif instr == 'link': + source.size = SIZE_LONG + il.append( + il.push(4, source.get_source_il(il)) + ) + il.append( + source.get_dest_il(il, il.reg(4, "sp")) + ) + il.append( + il.set_reg(4, + "sp", + il.add(4, + il.reg(4, "sp"), + il.sign_extend(4, dest.get_source_il(il)) + ) + ) + ) + elif instr == 'unlk': + il.append( + il.set_reg(4, "sp", source.get_source_il(il)) + ) + il.append( + source.get_dest_il(il, il.pop(4)) + ) + elif instr in ('jmp', 'bra'): + tmpil = LowLevelILFunction(il.arch) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) + + dstlabel = None + try: + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + dstlabel = il.get_label_for_address(il.arch, tmpil[dest_il].constant) + except: + raise + + if dstlabel is not None: + il.append( + il.goto(dstlabel) + ) + else: + il.append( + il.jump(dest.get_address_il(il)) + ) + elif instr in ('jsr', 'bsr'): + il.append( + il.call(dest.get_address_il(il)) + ) + elif instr == 'callm': + # TODO + il.append(il.unimplemented()) + elif instr == 'cpush': + # TODO + il.append(il.unimplemented()) + elif instr in ('bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', 'bvc', 'bvs', + 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble'): + flag_cond = ConditionMapping.get(instr[1:], None) + tmpil = LowLevelILFunction(il.arch) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) + cond_il = None + + if flag_cond is not None: + cond_il = il.flag_condition(flag_cond) + + if cond_il is None: + il.append(il.unimplemented()) + else: + t = None + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) + + indirect = False + + if t is None: + t = LowLevelILLabel() + indirect = True + + f_label_found = True + + f = il.get_label_for_address(il.arch, il.current_address+length) + + if f is None: + f = LowLevelILLabel() + f_label_found = False + + il.append( + il.if_expr(cond_il, t, f) + ) + + if indirect: + il.mark_label(t) + il.append(il.jump(dest.get_address_il(il))) + + if not f_label_found: + il.mark_label(f) + elif instr in ('dbt', 'dbf', 'dbhi', 'dbls', 'dbcc', 'dbcs', 'dbne', + 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', + 'dbgt', 'dble'): + flag_cond = ConditionMapping.get(instr[2:], None) + tmpil = LowLevelILFunction(il.arch) + _dest_il = dest.get_address_il2(tmpil) + dest_il = _dest_il[0] + for i in _dest_il[1]: + tmpil.append(i) + cond_il = None + + if flag_cond is not None: + cond_il = il.flag_condition(flag_cond) + elif instr == 'dbt': + cond_il = il.const(1, 1) + elif instr == 'dbf': + cond_il = il.const(1, 0) + + if cond_il is None: + il.append(il.unimplemented()) + else: + branch = None + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + branch = il.get_label_for_address(Architecture['M68000'], tmpil[dest_il].constant) + + indirect = False + + if branch is None: + branch = LowLevelILLabel() + indirect = True + + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + decrement = LowLevelILLabel() + + il.append( + il.if_expr(cond_il, skip, decrement) + ) + + il.mark_label(decrement) + + il.append( + il.set_reg(2, + LLIL_TEMP(0), + il.sub(2, + source.get_source_il(il), + il.const(2, 1) + ) + ) + ) + + il.append( + source.get_dest_il(il, il.reg(2, LLIL_TEMP(0))) + ) + + il.append( + il.if_expr( + il.compare_equal(2, + il.reg(2, LLIL_TEMP(0)), + il.const(2, -1) + ), + skip, + branch + ) + ) + + if indirect: + il.mark_label(branch) + il.append(il.jump(dest.get_address_il(il))) + + if not skip_label_found: + il.mark_label(skip) + elif instr in ('st', 'sf', 'shi', 'sls', 'scc', 'scs', 'sne', 'seq', + 'svc', 'svs', 'spl', 'smi', 'sge', 'slt', 'sgt', 'sle'): + flag_cond = ConditionMapping.get(instr[1:], None) + cond_il = None + + if flag_cond is not None: + cond_il = il.flag_condition(flag_cond) + elif instr == 'st': + cond_il = il.const(1, 1) + elif instr == 'sf': + cond_il = il.const(1, 0) + + if cond_il is None: + il.append(il.unimplemented()) + else: + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + set_dest = LowLevelILLabel() + clear_dest = LowLevelILLabel() + + il.append( + il.if_expr(cond_il, set_dest, clear_dest) + ) + + il.mark_label(set_dest) + + il.append( + dest.get_dest_il(il, il.const(1, 1)) + ) + + il.append( + il.goto(skip) + ) + + il.mark_label(clear_dest) + + il.append( + dest.get_dest_il(il, il.const(1, 0)) + ) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr == 'rtd': + il.append( + il.set_reg(4, + LLIL_TEMP(0), + il.pop(4) + ) + ) + il.append( + il.set_reg(4, 'sp', + il.add(4, + il.reg(4, 'sp'), + il.sign_extend(4, il.const(2, + dest.value), + 0 + ) + ) + ) + ) + il.append( + il.ret( + il.reg(4, LLIL_TEMP(0)) + ) + ) + elif instr == 'rte': + il.append( + il.set_reg(2, + "sr", + il.pop(2) + ) + ) + il.append( + il.ret( + il.pop(4) + ) + ) + elif instr == 'rtm': + # TODO + il.append(il.unimplemented()) + elif instr == 'rtr': + il.append( + il.set_reg(2, + "ccr", + il.pop(2) + ) + ) + il.append( + il.ret( + il.pop(4) + ) + ) + elif instr == 'rts': + il.append( + il.ret( + il.pop(4) + ) + ) + elif instr in ('trapv', 'trapt', 'trapf', 'traphi', 'trapls', 'trapcc', + 'trapcs', 'trapne', 'trapeq', 'trapvc', 'trapvs', 'trappl', + 'trapmi', 'trapge', 'traplt', 'trapgt', 'traple'): + flag_cond = ConditionMapping.get(instr[4:], None) + cond_il = None + + if flag_cond is not None: + cond_il = il.flag_condition(flag_cond) + elif instr == 'trapt': + cond_il = il.const(1, 1) + elif instr == 'trapf': + cond_il = il.const(1, 0) + elif instr == 'trapv': + cond_il = il.flag_condition(LowLevelILFlagCondition.LLFC_O) + + if cond_il is None: + il.append(il.unimplemented()) + else: + skip_label_found = True + + skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + + if skip is None: + skip = LowLevelILLabel() + skip_label_found = False + + trap = LowLevelILLabel() + + il.append( + il.if_expr(cond_il, trap, skip) + ) + + il.mark_label(trap) + + il.append( + il.system_call() + ) + + il.append( + il.goto(skip) + ) + + if not skip_label_found: + il.mark_label(skip) + elif instr in ('trap', 'illegal', 'bkpt'): + il.append(il.system_call()) + elif instr in ('bgnd', 'nop', 'reset', 'stop'): + il.append(il.nop()) + else: + il.append(il.unimplemented()) + + def get_instruction_info(self, data: bytes, addr: int) -> Optional[InstructionInfo]: + instr, length, _size, _source, dest, _third = self.decode_instruction(data, addr) + if instr == 'unimplemented': + return None + + result = InstructionInfo() + result.length = length + + if instr in ('rtd', 'rte', 'rtr', 'rts'): + result.add_branch(BranchType.FunctionReturn) + elif instr in ('jmp', 'jsr', + 'bra', 'bsr', 'bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', + 'bvc', 'bvs', 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble', + 'dbt', 'dbf', 'dbhi', 'dbls', 'dbcc', 'dbcs', 'dbne', + 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', + 'dbgt', 'dble'): + conditional = False + branch_dest = None + + bt = BranchType.UnresolvedBranch + if instr in ('jmp', 'bra'): + bt = BranchType.UnconditionalBranch + elif instr in ('jsr', 'bsr'): + bt = BranchType.CallDestination + else: + conditional = True + + if isinstance(dest, OpAbsolute): + branch_dest = dest.address + elif isinstance(dest, OpRegisterIndirect): + if dest.reg == 'pc': + branch_dest = addr+2 + else: + bt = BranchType.UnresolvedBranch + elif isinstance(dest, OpRegisterIndirectDisplacement): + if dest.reg == 'pc': + branch_dest = addr+2+dest.offset + else: + bt = BranchType.UnresolvedBranch + elif isinstance(dest, OpRegisterIndirectIndex): + bt = BranchType.UnresolvedBranch + + if conditional: + # pylint: disable=unsubscriptable-object + if instr[0:2] == 'db': + result.add_branch(BranchType.TrueBranch, addr+length) + result.add_branch(BranchType.FalseBranch, branch_dest) + else: + result.add_branch(BranchType.TrueBranch, branch_dest) + result.add_branch(BranchType.FalseBranch, addr+length) + else: + if bt == BranchType.IndirectBranch or bt == BranchType.UnresolvedBranch or branch_dest is None: + result.add_branch(bt) + else: + result.add_branch(bt, branch_dest) + + return result + + def get_instruction_text(self, data: bytes, addr: int) -> Optional[Tuple[List['function.InstructionTextToken'], int]]: + instr, length, size, source, dest, third = self.decode_instruction(data, addr) + + if size is not None: + # pylint: disable=invalid-sequence-index + instr += SizeSuffix[size] + + tokens = [InstructionTextToken(InstructionTextTokenType.InstructionToken, "%-10s" % instr)] + + if source is not None: + tokens += source.format(addr) + + if dest is not None: + if source is not None: + tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')] + tokens += dest.format(addr) + + if third is not None: + if source is not None or dest is not None: + tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')] + tokens += third.format(addr) + + return tokens, length + + def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.LowLevelILFunction) -> Optional[int]: + instr, length, size, source, dest, third = self.decode_instruction(data, addr) + + if instr == 'movem': + # movem overrides default predecrement/postincrement IL generation + + self.generate_instruction_il(il, instr, length, size, source, dest, third) + + elif instr is not None: + + # predecrement + if source is not None: + pre_il = source.get_pre_il(il) + if pre_il is not None: + il.append(pre_il) + + if dest is not None: + pre_il = dest.get_pre_il(il) + if pre_il is not None: + il.append(pre_il) + + if third is not None: + pre_il = third.get_pre_il(il) + if pre_il is not None: + il.append(pre_il) + + self.generate_instruction_il(il, instr, length, size, source, dest, third) + + # postincrement + if source is not None: + post_il = source.get_post_il(il) + if post_il is not None: + il.append(post_il) + + if dest is not None: + post_il = dest.get_post_il(il) + if post_il is not None: + il.append(post_il) + + if third is not None: + post_il = third.get_post_il(il) + if post_il is not None: + il.append(post_il) + else: + il.append(il.unimplemented()) + return length + + def is_never_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: + data = bytearray(data) + if data[0] & 0xf0 == 0x60: + # BRA, BSR, Bcc + return True + if data[0] == 0x4e and data[1] & 0x80 == 0x80: + # JMP, JSR + return True + return False + + def is_invert_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: + data = bytearray(data) + if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: + # Bcc + return True + return False + + def is_always_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: + data = bytearray(data) + if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: + # Bcc + return True + return False + + def is_skip_and_return_zero_patch_available(self, data: bytes, addr: int = 0) -> bool: + return self.skip_and_return_value(data, addr, 0) + + def is_skip_and_return_value_patch_available(self, data: bytes, addr: int = 0) -> bool: + data = bytearray(data) + if data[0] == 0x61: + # BSR + return True + if data[0] == 0x4e and data[1] & 0xc0 == 0x80: + # JSR + return True + return False + + def convert_to_nop(self, data: bytes, addr: int = 0) -> Optional[bytes]: + count = int(len(data)/2) + if count*2 != len(data): + return None + return b'\x4e\x71' * count + + def never_branch(self, data, addr): + data = bytearray(data) + if data[0] & 0xf0 == 0x60: + # BRA, BSR, Bcc + return self.convert_to_nop(data, addr) + if data[0] == 0x4e and data[1] & 0x80 == 0x80: + # JMP, JSR + return self.convert_to_nop(data, addr) + return None + + def invert_branch(self, data: bytes, addr: int = 0) -> Optional[bytes]: + data = bytearray(data) + if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: + # Bcc + return bytearray([data[0]^1])+data[1:] + return None + + def always_branch(self, data: bytes, addr: int = 0) -> Optional[bytes]: + data = bytearray(data) + if data[0] & 0xf0 == 0x60 and data[0] & 0xfe != 0x60: + # Bcc + return b'\x60'+data[1:] + return None + + def skip_and_return_value(self, data: bytes, addr: int, value: int) -> Optional[bytes]: + count = int(len(data)/2) + if count*2 != len(data): + return None + data = bytearray(data) + ok = False + if data[0] == 0x61: + # BSR + ok = True + if data[0] == 0x4e and data[1] & 0xc0 == 0x80: + # JSR + ok = True + if not ok: + return None + + if value > 0x80000000: + value = value - 0x100000000 + + if value >= -128 and value <= 127 and len(data) >= 2: + value = value & 0xff + return b'\x70'+struct.pack('>b',value)+b'\x4e\x71'*(count-1) + + if len(data) >= 6: + return b'\x20\x3C'+struct.pack('>l', value)+b'\x4e\x71'*(count-3) + + return None + + +class M68008(M68000): + name = "M68008" + + +class M68010(M68000): + name = "M68010" + control_registers = { + 0x000: 'sfc', + 0x001: 'dfc', + 0x800: 'usp', + 0x801: 'vbr', + } + + # add BKPT, MOVE from CCR, MOVEC, MOVES, RTD + + +class M68020(M68010): + name = "M68020" + control_registers = { + 0x000: 'sfc', + 0x001: 'dfc', + 0x800: 'usp', + 0x801: 'vbr', + 0x002: 'cacr', + 0x802: 'caar', + 0x803: 'msp', + 0x804: 'isp', + } + address_size = 4 + memory_indirect = True + movem_store_decremented = True + + # add BFCHG, BFCLR, BFEXTS, BFEXTU, BFFO, BFINS, BFSET, BFTST, CALLM, CAS, CAS2, CHK2, CMP2, cpBcc, cpDBcc, cpGEN, cpRESTORE, cpSAVE, cpScc, cpTRAPcc + # DIVSL, DIVUL, EXTB, PACK, RTM, TRAPcc, UNPK + # add memory indirect addressing + + +class M68030(M68020): + name = "M68030" + + # remove CALLM, RTM + # add PFLUSH, PFLUSHA, PLOAD, PMOVE, PTEST + + +class M68040(M68030): + name = "M68040" + control_registers = { + 0x000: 'sfc', + 0x001: 'dfc', + 0x800: 'usp', + 0x801: 'vbr', + 0x002: 'cacr', + 0x803: 'msp', + 0x804: 'isp', + 0x003: 'tc', + 0x004: 'itt0', + 0x005: 'itt1', + 0x006: 'dtt0', + 0x007: 'dtt1', + 0x805: 'mmusr', + 0x806: 'urp', + 0x807: 'srp', + } + + # remove cpBcc, cpDBcc, cpGEN, cpRESTORE, cpSAVE, cpScc, cpTRAPcc, PFLUSHA, PLOAD, PMOVE + # add CINV, CPUSH, floating point, MOVE16 + + +class M68LC040(M68040): + name = "M68LC040" + + +class M68EC040(M68040): + name = "M68EC040" + control_registers = { + 0x000: 'sfc', + 0x001: 'dfc', + 0x800: 'usp', + 0x801: 'vbr', + 0x002: 'cacr', + 0x803: 'msp', + 0x804: 'isp', + 0x004: 'iacr0', + 0x005: 'iacr1', + 0x006: 'dacr0', + 0x007: 'dacr1' + } + + +class M68330(M68010): + name = "M68330" + movem_store_decremented = True + # AKA CPU32 + + # add BGND, CHK2, CMP2, DIVSL, DIVUL, EXTB, LPSTOP, TBLS, TBLSN, TBLU, TBLUN, TRAPcc + + +class M68340(M68330): + name = "M68340" + + +def create_vector_table(view, addr, size=256): + vectors = { + 0: 'reset_initial_interrupt_stack_pointer', + 1: 'reset_initial_program_counter', + 2: 'access_fault', + 3: 'address_error', + 4: 'illegal_instruction', + 5: 'integer_divide_by_zero', + 6: 'chk_chk2_instruction', + 7: 'ftrapcc_trapcc_trapv_instruction', + 8: 'privilege_violation', + 9: 'trace', + 10: 'line_1010_emulator', + 11: 'line_1111_emulator', + # 12 unassigned_reserved + 13: 'coprocessor_protocol_violation', + 14: 'format_error', + 15: 'uninitialized_interrupt', + # 16-23 unassigned_reserved + 24: 'spurious_interrupt', + 25: 'level_1_interrupt_autovector', + 26: 'level_2_interrupt_autovector', + 27: 'level_3_interrupt_autovector', + 28: 'level_4_interrupt_autovector', + 29: 'level_5_interrupt_autovector', + 30: 'level_6_interrupt_autovector', + 31: 'level_7_interrupt_autovector', + 32: 'trap_0_instruction', + 33: 'trap_1_instruction', + 34: 'trap_2_instruction', + 35: 'trap_3_instruction', + 36: 'trap_4_instruction', + 37: 'trap_5_instruction', + 38: 'trap_6_instruction', + 39: 'trap_7_instruction', + 40: 'trap_8_instruction', + 41: 'trap_9_instruction', + 42: 'trap_10_instruction', + 43: 'trap_11_instruction', + 44: 'trap_12_instruction', + 45: 'trap_13_instruction', + 46: 'trap_14_instruction', + 47: 'trap_15_instruction', + 48: 'fp_branch_or_set_on_unordered_condition', + 49: 'fp_inexact_result', + 50: 'fp_divide_by_zero', + 51: 'fp_underflow', + 52: 'fp_operand_error', + 53: 'fp_overflow', + 54: 'fp_signaling_nan', + 55: 'fp_unimplemented_data_type', + 56: 'mmu_configuration_error', + 57: 'mmu_illegal_operation_error', + 58: 'mmu_access_level_violation_error', + # 59-63 unassigned_reserved + } + for k in range(0, 192): + vectors[k+64] = 'user_%d' % k + + t = view.parse_type_string("void *")[0] + + for k in range(size): + name = vectors.get(k, 'unassigned_reserved') + + view.define_user_symbol(Symbol(SymbolType.DataSymbol, addr+4*k, "_vector_%d_%s" % (k, name))) + view.define_user_data_var(addr+4*k, t) + value = struct.unpack(">L", view.read(addr+4*k, 4))[0] + + if k > 0: + view.define_user_symbol(Symbol(SymbolType.FunctionSymbol, value, "vector_%d_%s" % (k, name))) + if value > 0: + view.add_entry_point(value) + + +def prompt_create_vector_table(view, addr=None): + architectures = ['M68000', 'M68008', 'M68010', 'M68020', 'M68030', 'M68040', 'M68LC040', 'M68EC040', 'M68330', 'M68340'] + size_choices = ['Full (256)', 'MMU (59)', 'FP (56)', 'Traps (48)', 'Interrupts (32)'] + size_raw = [256, 59, 56, 48, 32] + + if addr is None: + addr = 0 + + need_arch = True + if view.platform is not None and view.platform.arch.name in architectures: + # 68k arch already selected + need_arch = False + + address_field = AddressField('Address', view, addr) + arch_field = ChoiceField('Architecture', architectures) + size_field = ChoiceField('Table size', size_choices) + + res = False + + if need_arch: + res = get_form_input([address_field, arch_field, size_field], 'Create M68k vector table') + else: + res = get_form_input([address_field, size_field], 'Create M68k vector table') + + if res: + address = address_field.result + size = size_raw[size_field.result] + + if need_arch: + arch = architectures[arch_field.result] + view.platform = Architecture[arch].standalone_platform + + create_vector_table(view, address, size) + diff --git a/test.py b/test.py new file mode 100644 index 0000000..087df17 --- /dev/null +++ b/test.py @@ -0,0 +1,87 @@ +@@ -0,0 +1,86 @@ +# inspired by https://github.com/Vector35/arch-arm64/blob/staging/arm64test.py +from .m68k import * + +test_cases = [ + # subq.b #$1,d0b + (b'\x53\x00', 'LLIL_SET_REG.o(v2,LLIL_NOT.o(LLIL_CONST.o(0x2F)))'), +] + +import re +import sys +import binaryninja +from binaryninja import binaryview +from binaryninja import lowlevelil +from binaryninja.enums import LowLevelILOperation + +def il2str(il): + sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'} + if isinstance(il, lowlevelil.LowLevelILInstruction): + size_code = sz_lookup.get(il.size, '?') if il.size else '' + flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags + + # print size-specified IL constants in hex + if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: + tmp = il.operands[0] + if tmp < 0: tmp = (1<<(il.size*8))+tmp + tmp = '0x%X' % tmp if il.size else '%d' % il.size + return 'LLIL_CONST%s(%s)' % (size_code, tmp) + else: + return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) + elif isinstance(il, list): + return '[' + ','.join([il2str(x) for x in il]) + ']' + else: + return str(il) + +def instr_to_il(data): + RETURN = b'\x4e\x75' + + platform = binaryninja.Platform['M68000'] + # make a pretend function that returns + bv = binaryview.BinaryView.new(data + RETURN) + bv.add_function(0, plat=platform) + assert len(bv.functions) == 1 + + result = [] + #for block in bv.functions[0].low_level_il: + for block in bv.functions[0].lifted_il: + for il in block: + result.append(il2str(il)) + result = '; '.join(result) + # if result.endswith('LLIL_RET(LLIL_REG.q(x30))'): + # result = result[0:result.index('LLIL_RET(LLIL_REG.q(x30))')] + # if result.endswith('; '): + # result = result[0:-2] + + return result + +def il_str_to_tree(ilstr): + result = '' + depth = 0 + for c in ilstr: + if c == '(': + result += '\n' + depth += 1 + result += ' '*depth + elif c == ')': + depth -= 1 + elif c == ',': + result += '\n' + result += ' '*depth + pass + else: + result += c + return result + +def test_all(): + for (test_i, (data, expected)) in enumerate(test_cases): + actual = instr_to_il(data) + if actual != expected: + print('MISMATCH AT TEST %d!' % test_i) + print('\t input: %s' % data.hex()) + print('\texpected: %s' % expected) + print('\t actual: %s' % actual) + print('\t tree:') + print(il_str_to_tree(actual)) + return False + return True From bbb227b8bd5d5f51caa82413ffde922e4bf00647 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:32:31 +1000 Subject: [PATCH 07/46] was 'comment out 68k variants'. Run tests when loading plugin. --- __init__.py | 3 ++ test.py | 124 ++++++++++++++++++++++++++-------------------------- 2 files changed, 65 insertions(+), 62 deletions(-) diff --git a/__init__.py b/__init__.py index 8f32d9f..a4034b3 100644 --- a/__init__.py +++ b/__init__.py @@ -1,4 +1,5 @@ from .m68k import * +from .test import test_all #PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) @@ -15,3 +16,5 @@ M68340.register() BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) + +test_all() diff --git a/test.py b/test.py index 087df17..deeb66c 100644 --- a/test.py +++ b/test.py @@ -1,10 +1,9 @@ -@@ -0,0 +1,86 @@ # inspired by https://github.com/Vector35/arch-arm64/blob/staging/arm64test.py from .m68k import * test_cases = [ # subq.b #$1,d0b - (b'\x53\x00', 'LLIL_SET_REG.o(v2,LLIL_NOT.o(LLIL_CONST.o(0x2F)))'), + (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), ] import re @@ -15,73 +14,74 @@ from binaryninja.enums import LowLevelILOperation def il2str(il): - sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'} - if isinstance(il, lowlevelil.LowLevelILInstruction): - size_code = sz_lookup.get(il.size, '?') if il.size else '' - flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags + sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'} + if isinstance(il, lowlevelil.LowLevelILInstruction): + size_code = sz_lookup.get(il.size, '?') if il.size else '' + flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags - # print size-specified IL constants in hex - if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: - tmp = il.operands[0] - if tmp < 0: tmp = (1<<(il.size*8))+tmp - tmp = '0x%X' % tmp if il.size else '%d' % il.size - return 'LLIL_CONST%s(%s)' % (size_code, tmp) - else: - return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) - elif isinstance(il, list): - return '[' + ','.join([il2str(x) for x in il]) + ']' - else: - return str(il) + # print size-specified IL constants in hex + if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: + tmp = il.operands[0] + if tmp < 0: tmp = (1<<(il.size*8))+tmp + tmp = '0x%X' % tmp if il.size else '%d' % il.size + return 'LLIL_CONST%s(%s)' % (size_code, tmp) + else: + return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) + elif isinstance(il, list): + return '[' + ','.join([il2str(x) for x in il]) + ']' + else: + return str(il) def instr_to_il(data): - RETURN = b'\x4e\x75' + RETURN = b'\x4e\x75' - platform = binaryninja.Platform['M68000'] - # make a pretend function that returns - bv = binaryview.BinaryView.new(data + RETURN) - bv.add_function(0, plat=platform) - assert len(bv.functions) == 1 + platform = binaryninja.Platform['M68000'] + # make a pretend function that returns + bv = binaryview.BinaryView.new(data + RETURN) + bv.add_function(0, plat=platform) + assert len(bv.functions) == 1 - result = [] - #for block in bv.functions[0].low_level_il: - for block in bv.functions[0].lifted_il: - for il in block: - result.append(il2str(il)) - result = '; '.join(result) - # if result.endswith('LLIL_RET(LLIL_REG.q(x30))'): - # result = result[0:result.index('LLIL_RET(LLIL_REG.q(x30))')] - # if result.endswith('; '): - # result = result[0:-2] + result = [] + #for block in bv.functions[0].low_level_il: + for block in bv.functions[0].lifted_il: + for il in block: + result.append(il2str(il)) + result = '; '.join(result) + ret = 'LLIL_RET(LLIL_POP.d())' + if result.endswith(ret): + result = result[0:result.index(ret)] + if result.endswith('; '): + result = result[0:-2] - return result + return result def il_str_to_tree(ilstr): - result = '' - depth = 0 - for c in ilstr: - if c == '(': - result += '\n' - depth += 1 - result += ' '*depth - elif c == ')': - depth -= 1 - elif c == ',': - result += '\n' - result += ' '*depth - pass - else: - result += c - return result + result = '' + depth = 0 + for c in ilstr: + if c == '(': + result += '\n' + depth += 1 + result += ' '*depth + elif c == ')': + depth -= 1 + elif c == ',': + result += '\n' + result += ' '*depth + pass + else: + result += c + return result def test_all(): - for (test_i, (data, expected)) in enumerate(test_cases): - actual = instr_to_il(data) - if actual != expected: - print('MISMATCH AT TEST %d!' % test_i) - print('\t input: %s' % data.hex()) - print('\texpected: %s' % expected) - print('\t actual: %s' % actual) - print('\t tree:') - print(il_str_to_tree(actual)) - return False - return True + for (test_i, (data, expected)) in enumerate(test_cases): + actual = instr_to_il(data) + if actual != expected: + print('MISMATCH AT TEST %d!' % test_i) + print('\t input: %s' % data.hex()) + print('\texpected: %s' % expected) + print('\t actual: %s' % actual) + print('\t tree:') + print(il_str_to_tree(actual)) + return False + return True From 7f8fcec72a549e87125d3feb9b80b7d7e30f860e Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:36:47 +1000 Subject: [PATCH 08/46] clean up imports. --- __init__.py | 16 ++++++++++++++++ m68k.py | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/__init__.py b/__init__.py index a4034b3..1e3ee31 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3 +1,19 @@ +import sys +import os +import binaryninja + +__module__ = sys.modules[__name__] +__logger = binaryninja.Logger(0, __module__.__name__) + +log = __logger.log +log_debug = __logger.log_debug +log_info = __logger.log_info +log_warn = __logger.log_warn +log_error = __logger.log_error +log_alert = __logger.log_alert + +log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') + from .m68k import * from .test import test_all diff --git a/m68k.py b/m68k.py index 70880f1..a454fda 100644 --- a/m68k.py +++ b/m68k.py @@ -22,21 +22,7 @@ """ -from __future__ import print_function - -import sys - -__module__ = sys.modules[__name__] - import binaryninja -__logger = binaryninja.Logger(0, __module__.__name__) - -log = __logger.log -log_debug = __logger.log_debug -log_info = __logger.log_info -log_warn = __logger.log_warn -log_error = __logger.log_error -log_alert = __logger.log_alert from typing import List, Optional, Tuple @@ -56,8 +42,6 @@ ImplicitRegisterExtend, SymbolType) from binaryninja import BinaryViewType, lowlevelil -log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') - # Shift syles SHIFT_SYLE_ARITHMETIC = 0, SHIFT_SYLE_LOGICAL = 1, From 41d0b948f227c72a65a574d7b722e213c99a2e69 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:47:08 +1000 Subject: [PATCH 09/46] split out ops and disasm --- m68k.py | 1731 +----------------------------------------------- m68k_disasm.py | 878 ++++++++++++++++++++++++ m68k_ops.py | 936 ++++++++++++++++++++++++++ test.py | 5 +- 4 files changed, 1826 insertions(+), 1724 deletions(-) create mode 100644 m68k_disasm.py create mode 100644 m68k_ops.py diff --git a/m68k.py b/m68k.py index a454fda..7109fa8 100644 --- a/m68k.py +++ b/m68k.py @@ -42,899 +42,8 @@ ImplicitRegisterExtend, SymbolType) from binaryninja import BinaryViewType, lowlevelil -# Shift syles -SHIFT_SYLE_ARITHMETIC = 0, -SHIFT_SYLE_LOGICAL = 1, -SHIFT_SYLE_ROTATE_WITH_EXTEND = 2, -SHIFT_SYLE_ROTATE = 3, - -ShiftStyle = [ - 'as', # SHIFT_SYLE_ARITHMETIC - 'ls', # SHIFT_SYLE_LOGICAL - 'rox', # SHIFT_SYLE_ROTATE_WITH_EXTEND - 'ro' # SHIFT_SYLE_ROTATE -] - -BITFIELD_STYLE_TST = 0, -BITFIELD_STYLE_EXTU = 1, -BITFIELD_STYLE_CHG = 2, -BITFIELD_STYLE_EXTS = 3, -BITFIELD_STYLE_CLR = 4, -BITFIELD_STYLE_FFO = 5, -BITFIELD_STYLE_SET = 6, -BITFIELD_STYLE_INS = 7, - -BitfieldStyle = [ - "tst", # BITFIELD_STYLE_TST - "extu", # BITFIELD_STYLE_EXTU - "chg", # BITFIELD_STYLE_CHG - "exts", # BITFIELD_STYLE_EXTS - "clr", # BITFIELD_STYLE_CLR - "ffo", # BITFIELD_STYLE_FFO - "set", # BITFIELD_STYLE_SET - "ins", # BITFIELD_STYLE_INS -] - - -# Condition codes -CONDITION_TRUE = 0 -CONDITION_FALSE = 1 -CONDITION_HIGH = 2 -CONDITION_LESS_OR_SAME = 3 -CONDITION_CARRY_CLEAR = 4 -CONDITION_CARRY_SET = 5 -CONDITION_NOT_EQUAL = 6 -CONDITION_EQUAL = 7 -CONDITION_OVERFLOW_CLEAR = 8 -CONDITION_OVERFLOW_SET = 9 -CONDITION_PLUS = 10 -CONDITION_MINUS = 11 -CONDITION_GREATER_OR_EQUAL = 12 -CONDITION_LESS_THAN = 13 -CONDITION_GREATER_THAN = 14 -CONDITION_LESS_OR_EQUAL = 15 - -Condition = [ - 't', # CONDITION_TRUE - 'f', # CONDITION_FALSE - 'hi', # CONDITION_HIGH - 'ls', # CONDITION_LESS_OR_SAME - 'cc', # CONDITION_CARRY_CLEAR - 'cs', # CONDITION_CARRY_SET - 'ne', # CONDITION_NOT_EQUAL - 'eq', # CONDITION_EQUAL - 'vc', # CONDITION_OVERFLOW_CLEAR - 'vs', # CONDITION_OVERFLOW_SET - 'pl', # CONDITION_PLUS - 'mi', # CONDITION_MINUS - 'ge', # CONDITION_GREATER_OR_EQUAL - 'lt', # CONDITION_LESS_THAN - 'gt', # CONDITION_GREATER_THAN - 'le' # CONDITION_LESS_OR_EQUAL -] - -# Registers -REGISTER_D0 = 0 -REGISTER_D1 = 1 -REGISTER_D2 = 2 -REGISTER_D3 = 3 -REGISTER_D4 = 4 -REGISTER_D5 = 5 -REGISTER_D6 = 6 -REGISTER_D7 = 7 -REGISTER_A0 = 8 -REGISTER_A1 = 9 -REGISTER_A2 = 10 -REGISTER_A3 = 11 -REGISTER_A4 = 12 -REGISTER_A5 = 13 -REGISTER_A6 = 14 -REGISTER_A7 = 15 - -Registers = [ - 'd0', # REGISTER_D0 - 'd1', # REGISTER_D1 - 'd2', # REGISTER_D2 - 'd3', # REGISTER_D3 - 'd4', # REGISTER_D4 - 'd5', # REGISTER_D5 - 'd6', # REGISTER_D6 - 'd7', # REGISTER_D7 - 'a0', # REGISTER_A0 - 'a1', # REGISTER_A1 - 'a2', # REGISTER_A2 - 'a3', # REGISTER_A3 - 'a4', # REGISTER_A4 - 'a5', # REGISTER_A5 - 'a6', # REGISTER_A6 - 'sp' # REGISTER_A7 -] - -# Sizes -SIZE_BYTE = 0 -SIZE_WORD = 1 -SIZE_LONG = 2 - -SizeSuffix = [ - '.b', # SIZE_BYTE - '.w', # SIZE_WORD - '.l', # SIZE_LONG -] - -# Operands - -class Operand: - def format(self, addr: int) -> List[InstructionTextToken]: - raise NotImplementedError - - def get_pre_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_post_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_address_il2(self, il: LowLevelILFunction) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - raise NotImplementedError - - def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - return self.get_address_il2(il)[0] - - def get_source_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: - raise NotImplementedError - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> Optional[ExpressionIndex]: - raise NotImplementedError - -class OpRegisterDirect(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterDirect(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # a0, d0 - return [ - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - if self.reg == 'ccr': - c = il.flag_bit(1, 'c', 0) - v = il.flag_bit(1, 'v', 1) - z = il.flag_bit(1, 'z', 2) - n = il.flag_bit(1, 'n', 3) - x = il.flag_bit(1, 'x', 4) - return il.or_expr(1, il.or_expr(1, il.or_expr(1, il.or_expr(1, c, v), z), n), x) - else: - return il.reg(1 << self.size, self.reg) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'ccr': - return il.unimplemented() - - # return il.set_reg(1 << self.size, self.reg, value) - # if self.size == SIZE_BYTE: - # if self.reg[0] == 'a' or self.reg == 'sp': - # return None - # else: - # return il.set_reg(1, self.reg+'.b', value, flags) - # elif self.size == SIZE_WORD: - # return il.set_reg(2, self.reg+'.w', value, flags) - # else: - # return il.set_reg(4, self.reg, value, flags) - if self.size == SIZE_BYTE: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.unimplemented() - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffffff00), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xff), value)), flags) - elif self.size == SIZE_WORD: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.set_reg(4, self.reg, il.sign_extend(4, value), flags) - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffff0000), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xffff), value)), flags) - else: - if value: - return il.set_reg(4, self.reg, value, flags) - else: - return il.unimplemented() - - -class OpRegisterDirectPair(Operand): - def __init__(self, size: int, reg1: str, reg2: str): - self.size = size - self.reg1 = reg1 - self.reg2 = reg2 - - def __repr__(self): - return "OpRegisterDirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0:d1 - return [ - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), - InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - return (il.set_reg(1 << self.size, self.reg1, values[0], flags), il.set_reg(1 << self.size, self.reg2, values[1], flags)) - - -class OpRegisterMovemList(Operand): - def __init__(self, size: int, regs: List[str]): - self.size = size - self.regs = regs - - def __repr__(self): - return "OpRegisterMovemList(%d, %s)" % (self.size, repr(self.regs)) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0-d7/a0/a2/a4-a7 - if len(self.regs) == 0: - return [] - tokens = [InstructionTextToken(InstructionTextTokenType.RegisterToken, self.regs[0])] - last = self.regs[0] - first = None - for reg in self.regs[1:]: - if Registers[Registers.index(last)+1] == reg and reg != 'a0': - if first is None: - first = last - last = reg - else: - if first is not None: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "/")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, reg)) - first = None - last = reg - if first is not None: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return [il.reg(1 << self.size, reg) for reg in self.regs] - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - return [il.set_reg(1 << self.size, reg, val, flags) for reg, val in zip(self.regs, values)] - - -class OpRegisterIndirect(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirect(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # (a0) - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectPair(Operand): - def __init__(self, size: int, reg1: str, reg2: str): - self.size = size - self.reg1 = reg1 - self.reg2 = reg2 - - def __repr__(self): - return "OpRegisterIndirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) - - def format(self, addr: int) -> List[InstructionTextToken]: - # d0:d1 - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), - InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return (il.reg(4, self.reg1), il.reg(4, self.reg2)) - a = il.reg(4, self.reg1) - b = il.reg(4, self.reg2) - return ((a, b), [a, b]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) - - def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: - #return (il.store(1 << self.size, il.reg(4, self.reg1), values[0], flags), il.store(1 << self.size, il.reg(4, self.reg2), values[1], flags)) - return (il.store(1 << self.size, il.reg(4, self.reg1), values[0]), il.store(1 << self.size, il.reg(4, self.reg2), values[1])) - - -class OpRegisterIndirectPostincrement(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirectPostincrement(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # (a0)+ - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), - InstructionTextToken(InstructionTextTokenType.TextToken, "+") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.set_reg(4, - self.reg, - il.add(4, - il.reg(4, self.reg), - il.const(4, 1 << self.size) - ) - ) - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectPredecrement(Operand): - def __init__(self, size: int, reg: str): - self.size = size - self.reg = reg - - def __repr__(self): - return "OpRegisterIndirectPredecrement(%d, %s)" % (self.size, self.reg) - - def format(self, addr: int) -> List[InstructionTextToken]: - # -(a0) - return [ - InstructionTextToken(InstructionTextTokenType.TextToken, "-"), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.set_reg(4, - self.reg, - il.sub(4, - il.reg(4, self.reg), - il.const(4, 1 << self.size) - ) - ) - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.reg(4, self.reg) - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectDisplacement(Operand): - def __init__(self, size: int, reg: str, offset: int): - self.size = size - self.reg = reg - self.offset = offset - - def __repr__(self): - return "OpRegisterIndirectDisplacement(%d, %s, 0x%x)" % (self.size, self.reg, self.offset) - - def format(self, addr: int) -> List[InstructionTextToken]: - if self.reg == 'pc': - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:08x}".format(addr+2+self.offset), addr+2+self.offset, 4), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - else: - # $1234(a0) - return [ - InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:04x}".format(self.offset), self.offset, 2), - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - if self.reg == 'pc': - r = il.const_pointer(4, il.current_address+2+self.offset) - return (r, [r]) - else: - a = il.reg(4, self.reg) - b = il.const(2, self.offset) - c = il.add(4, a, b) - return (c, [a, b, c]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpRegisterIndirectIndex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: int, scale: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - - def __repr__(self): - return "OpRegisterIndirectIndex(%d, %s, 0x%x, %s, %d, %d)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale) - - def format(self, addr: int) -> List[InstructionTextToken]: - # $1234(a0,a1.l*4) - tokens = [] - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # il.mult(4, - # il.reg(4 if self.ireg_long else 2, self.ireg), - # il.const(1, self.scale) - # ) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - e = il.add(4, a, b) - - c = il.reg(4 if self.ireg_long else 2, self.ireg) - d = il.const(1, self.scale) - f = il.mult(4, c, d) - - g = il.add(4, e, f) - return (g, [a, b, c, d, e, f, g]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirect(Operand): - def __init__(self, size: int, reg: str, offset: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirect(%d, %s, %d, %d)" % (self.size, self.reg, self.offset, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0],$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.load(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # ), - # il.const(4, self.outer_displacement) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - d = il.load(4, c) - - e = il.const(4, self.outer_displacement) - - f = il.add(4, d, e) - return (f, [a, b, c, d, e, f]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirectPostindex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirectPostindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0],a1.l*4,$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # j = il.add(4, d, i) - # d = il.load(4, c) - # c = il.add(4, a, b) - # a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # b = il.const(4, self.offset) - # ) - # ), - # i = il.add(4, g, h) - # g = il.mult(4, e, f) - # e = il.reg(4 if self.ireg_long else 2, self.ireg), - # f = il.const(1, self.scale) - # ), - # h = il.const(4, self.outer_displacement) - # ) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - d = il.load(4, c) - - e = il.reg(4 if self.ireg_long else 2, self.ireg), - f = il.const(1, self.scale) - # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) - # FIXME: why 'e' is a tuple with a second element missing??? - g = il.mult(4, e[0], f) - - h = il.const(4, self.outer_displacement) - i = il.add(4, g, h) - - j = il.add(4, d, i) - return (j, [a, b, c, d, e, f, g, h, i, j]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpMemoryIndirectPreindex(Operand): - def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): - self.size = size - self.reg = reg - self.offset = offset - self.ireg = ireg - self.ireg_long = ireg_long - self.scale = scale - self.outer_displacement = outer_displacement - - def __repr__(self): - return "OpMemoryIndirectPreindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ([$1234,a0,a1.l*4],$1234) - tokens = [] - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) - tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) - if self.offset != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) - tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) - if self.scale != 1: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) - if self.outer_displacement != 0: - tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) - tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) - tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) - return tokens - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.add(4, - # il.load(4, - # il.add(4, - # il.add(4, - # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), - # il.const(4, self.offset) - # ), - # il.mult(4, - # il.reg(4 if self.ireg_long else 2, self.ireg), - # il.const(1, self.scale) - # ) - # ) - # ), - # il.const(4, self.outer_displacement) - # ) - a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) - b = il.const(4, self.offset) - c = il.add(4, a, b) - - d = il.reg(4 if self.ireg_long else 2, self.ireg) - e = il.const(1, self.scale) - f = il.mult(4, d, e) - - g = il.add(4, c, f) - h = il.load(4, g) - - i = il.const(4, self.outer_displacement) - j = il.add(4, h, i) - return (j, [a, b, c, d, e, f, g, h, i, j]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'pc': - return il.unimplemented() - else: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpAbsolute(Operand): - def __init__(self, size, address, address_size, address_width): - self.size = size - self.address = address - self.address_size = address_size - self.address_width = address_width - - def __repr__(self): - return "OpAbsolute(%d, 0x%x, %d, %d)" % (self.size, self.address, self.address_size, self.address_width) - - def format(self, addr: int) -> List[InstructionTextToken]: - # ($1234).w - return [ - InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), - InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.address, 1 << self.address_size), self.address, 1 << self.address_size), - InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"+SizeSuffix[self.address_size]) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - # return il.sign_extend(self.address_width, - # il.const(1 << self.address_size, self.address) - # ) - a = il.const(1 << self.address_size, self.address) - b = il.sign_extend(self.address_width, a) - return (b, [a, b]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.load(1 << self.size, self.get_address_il(il)) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - #return il.store(1 << self.size, self.get_address_il(il), value, flags) - return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) - - -class OpImmediate(Operand): - def __init__(self, size, value): - self.size = size - self.value = value - - def __repr__(self): - return "OpImmediate(%d, 0x%x)" % (self.size, self.value) - - def format(self, addr: int) -> List[InstructionTextToken]: - # #$1234 - return [ - InstructionTextToken(InstructionTextTokenType.TextToken, "#"), - #InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) - InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) - ] - - def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return None - - def get_address_il2(self, il) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: - r = il.unimplemented() - return (r, [r]) - - def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: - return il.const(1 << self.size, self.value) - - def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - return il.unimplemented() - - -# condition mapping to LLIL flag conditions -ConditionMapping = { - 'hi': LowLevelILFlagCondition.LLFC_UGT, - 'ls': LowLevelILFlagCondition.LLFC_ULE, - 'cc': LowLevelILFlagCondition.LLFC_UGE, - 'cs': LowLevelILFlagCondition.LLFC_ULT, - 'ne': LowLevelILFlagCondition.LLFC_NE, - 'eq': LowLevelILFlagCondition.LLFC_E, - 'vc': LowLevelILFlagCondition.LLFC_NO, - 'vs': LowLevelILFlagCondition.LLFC_O, - 'pl': LowLevelILFlagCondition.LLFC_POS, - 'mi': LowLevelILFlagCondition.LLFC_NEG, - 'ge': LowLevelILFlagCondition.LLFC_SGE, - 'lt': LowLevelILFlagCondition.LLFC_SLT, - 'gt': LowLevelILFlagCondition.LLFC_SGT, - 'le': LowLevelILFlagCondition.LLFC_SLE, -} +from .m68k_ops import * +from .m68k_disasm import * class M68000(Architecture): name = "M68000" @@ -1024,833 +133,9 @@ class M68000(Architecture): memory_indirect = False movem_store_decremented = False - def decode_effective_address(self, mode: int, register: int, data: bytes, size: Optional[int] = None) -> Tuple[Optional[Operand], Optional[int]]: - mode &= 0x07 - register &= 0x07 - - reg = None - - if mode == 0: - # data register direct - return (OpRegisterDirect(size, Registers[register]), 0) - elif mode == 1: - # address register direct - return (OpRegisterDirect(size, Registers[register+8]), 0) - elif mode == 2: - # address register indirect - return (OpRegisterIndirect(size, Registers[register+8]), 0) - elif mode == 3: - # address register indirect with postincrement - return (OpRegisterIndirectPostincrement(size, Registers[register+8]), 0) - elif mode == 4: - # address register indirect with predecrement - return (OpRegisterIndirectPredecrement(size, Registers[register+8]), 0) - elif mode == 5: - # address register indirect with displacement - return (OpRegisterIndirectDisplacement(size, Registers[register+8], struct.unpack_from('>h', data, 0)[0]), 2) - elif mode == 6: - # extended addressing mode - reg = Registers[register+8] - elif mode == 7: - if register == 0: - # absolute short - val = struct.unpack_from('>H', data, 0)[0] - if val & 0x8000: - if self.address_size == 4: - val |= 0xffff0000 # extend to 32-bits - else: - val |= 0xff0000 # extend to 24-bits (for 68000) - return (OpAbsolute(size, val, 1, self.address_size), 2) - if register == 1: - # absolute long - return (OpAbsolute(size, struct.unpack_from('>L', data, 0)[0], 2, self.address_size), 4) - elif register == 2: - # program counter indirect with displacement - return (OpRegisterIndirectDisplacement(size, 'pc', struct.unpack_from('>h', data, 0)[0]), 2) - elif register == 3: - # extended addressing mode - reg = 'pc' - elif register == 4: - # immediate - if size == None: - # unspecified length - return (OpImmediate(size, None), None) - elif size == SIZE_BYTE: - # byte - return (OpImmediate(size, struct.unpack_from('>b', data, 1)[0]), 2) - elif size == 1: - # word - return (OpImmediate(size, struct.unpack_from('>h', data, 0)[0]), 2) - elif size == 2: - # long - return (OpImmediate(size, struct.unpack_from('>l', data, 0)[0]), 4) - - if reg is not None: - extra = struct.unpack_from('>H', data, 0)[0] - # index register - xn = Registers[extra >> 12] - # index register size - index_size = (extra >> 11) & 1 - # index register scale - scale = 1 << ((extra >> 9) & 3) - length = 2 - - if extra & 0x0100: - # full extension word - bd = 0 - od = 0 - - # base displacement - if not (extra >> 7) & 1: - if (extra >> 4) & 3 == 2: - # word base displacement - bd = struct.unpack_from('>h', data, length)[0] - length += 2 - elif (extra >> 4) & 3 == 3: - # long base displacement - bd = struct.unpack_from('>L', data, length)[0] - length += 4 - - # outer displacement - if extra & 3 == 2: - # word outer displacement - od = struct.unpack_from('>h', data, length)[0] - length += 2 - elif extra & 3 == 3: - # long outer displacement - od = struct.unpack_from('>L', data, length)[0] - length += 4 - - # suppress index register - if extra & 7 == 0: - return (OpRegisterIndirectIndex(size, reg, bd, xn, index_size, scale), length) - elif (extra >> 6) & 1: - return (OpMemoryIndirect(size, reg, bd, od), length) - elif (extra >> 2) & 1: - return (OpMemoryIndirectPostindex(size, reg, bd, xn, index_size, scale, od), length) - else: - return (OpMemoryIndirectPreindex(size, reg, bd, xn, index_size, scale, od), length) - else: - # brief extension word - # 8 bit displacement - d8 = extra & 0xff - if d8 & 0x80: - d8 -= 256 - return (OpRegisterIndirectIndex(size, reg, d8, xn, index_size, scale), length) - - return (None, None) - - def decode_instruction(self, data: bytes, addr: int) -> Tuple[str, int, Optional[int], Optional[Operand], Optional[Operand], Optional[Operand]]: - error_value = ('unimplemented', len(data), None, None, None, None) - if len(data) < 2: - return error_value - - instruction = struct.unpack_from('>H', data)[0] - - msb = instruction >> 8 - operation_code = msb >> 4 - - #print((hex(addr), hex(instruction))) - - instr = None - length = None - size = None - source = None - dest = None - third = None - - if operation_code == 0x0: - # Bit manipulation/MOVEP/Immed late - if instruction & 0xf9c0 == 0x00c0: - # rtm, callm, chk2, cmp2 - if instruction & 0xfff0 == 0x06c0: - instr = 'rtm' - dest = OpRegisterDirect(SIZE_LONG, Registers[instruction & 15]) - length = 2 - elif instruction & 0xffc0 == 0x06c0: - instr = 'callm' - source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check - if extra_dest is None: - return error_value - length = 4+extra_dest - else: - size = (instruction >> 9) & 3 - extra = struct.unpack_from('>H', data, 2)[0] - if extra & 0x0800: - instr = 'chk2' - else: - instr = 'cmp2' - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check - dest = OpRegisterDirect(size, Registers[(instruction >> 12) & 15]) - if extra_source is None: - return error_value - length = 4+extra_source - elif instruction & 0xffc0 in (0x0ac0, 0x0cc0, 0x0ec0): - if instruction & 0xf9ff == 0x08fc: - instr = 'cas2' - size = ((instruction >> 9) & 3) - 1 - extra1 = struct.unpack_from('>H', data, 2)[0] - extra2 = struct.unpack_from('>H', data, 4)[0] - source = OpRegisterDirectPair(size, Registers[extra1 & 7], Registers[extra2 & 7]) - dest = OpRegisterDirectPair(size, Registers[(extra1 >> 6) & 7], Registers[(extra2 >> 6) & 7]) - third = OpRegisterIndirectPair(size, Registers[(extra1 >> 12) & 15], Registers[(extra2 >> 12) & 15]) - length = 6 - else: - instr = 'cas' - size = ((instruction >> 9) & 3) - 1 - extra = struct.unpack_from('>H', data, 2)[0] - source = OpRegisterDirect(size, Registers[extra & 7]) - dest = OpRegisterDirect(size, Registers[(extra >> 6) & 7]) - third, extra_third = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) - if extra_third is None: - return error_value - length = 4+extra_third - elif msb in (0x00, 0x02, 0x04, 0x06, 0x0a, 0x0c): - # ORI, ANDI, SUBI, ADDI, EORI, CMPI - if msb == 0x00: - instr = 'ori' - elif msb == 0x02: - instr = 'andi' - elif msb == 0x04: - instr = 'subi' - elif msb == 0x06: - instr = 'addi' - elif msb == 0x0a: - instr = 'eori' - elif msb == 0x0c: - instr = 'cmpi' - size = (instruction >> 6) & 0x03 - source, extra_source = self.decode_effective_address(7, 4, data[2:], size) - if instruction & 0x00ff == 0x003c: - dest = OpRegisterDirect(size, 'ccr') - extra_dest = 0 - elif instruction & 0x00ff == 0x007c: - dest = OpRegisterDirect(size, 'sr') - extra_dest = 0 - else: - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - - if dest is None: - instr = None - else: - length = 2+extra_source+extra_dest - elif msb == 0x08: - # btst, bchg, bclr, bset with constant - if instruction & 0xffc0 == 0x0800: - instr = 'btst' - elif instruction & 0xffc0 == 0x0840: - instr = 'bchg' - elif instruction & 0xffc0 == 0x0880: - instr = 'bclr' - elif instruction & 0xffc0 == 0x08C0: - instr = 'bset' - source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) - if isinstance(dest, OpRegisterDirect): - dest.size = SIZE_LONG - if dest is None: - instr = None - else: - length = 4+extra_dest - elif msb & 0xf1 == 0x01: - # movep, btst, bchg, bclr, bset with register - if instruction & 0xf138 == 0x0108: - instr = 'movep' - size = ((instruction >> 6) & 1) + 1 - source, extra_source = self.decode_effective_address(5, instruction, data[2:], SIZE_BYTE) # check - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - length = 2+extra_source - if instruction & 0x0080: - source, dest = dest, source - else: - if instruction & 0xf1c0 == 0x0100: - instr = 'btst' - elif instruction & 0xf1c0 == 0x0140: - instr = 'bchg' - elif instruction & 0xf1c0 == 0x0180: - instr = 'bclr' - elif instruction & 0xf1c0 == 0x01c0: - instr = 'bset' - source = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) # check - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) - if isinstance(dest, OpRegisterDirect): - dest.size = SIZE_LONG - if dest is None: - instr = None - else: - length = 2+extra_dest - elif instruction & 0xff00 == 0x0e00: - instr = 'moves' - extra = struct.unpack_from('>H', data, 2)[0] - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[extra >> 12]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) - if extra & 0x0800: - source, dest = dest, source - if extra_source is None: - return error_value - length = 4+extra_source - elif operation_code in (0x1, 0x2, 0x3): - # move - instr = 'move' - if operation_code == 0x1: - # Move byte - size = SIZE_BYTE - elif operation_code == 0x2: - # Move long - size = SIZE_LONG - elif operation_code == 0x3: - # Move word - size = SIZE_WORD - - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if source is None: - instr = None - else: - dest, extra_dest = self.decode_effective_address(instruction >> 6, instruction >> 9, data[2+extra_source:], size) - if dest is None or isinstance(dest, OpImmediate): - instr = None - else: - if isinstance(dest, OpRegisterDirect) and (dest.reg[0] == 'a' or dest.reg == 'sp'): - instr = 'movea' - length = 2+extra_source+extra_dest - elif operation_code == 0x4: - # Miscellaneous - extra_source = 0 - extra_dest = 0 - size = None - skip_ea = False - if instruction & 0xf100 == 0x4100: - # lea, extb, chk - if instruction & 0xf1c0 == 0x41c0: - if instruction & 0x0038: - instr = 'lea' - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - instr = 'extb' - size = SIZE_LONG - else: - instr = 'chk' - if instruction & 0x0080: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - elif msb == 0x40: - # move from sr, negx - if instruction & 0xffc0 == 0x40c0: - # move from sr - instr = 'move' - size = SIZE_WORD - source = OpRegisterDirect(size, 'sr') - else: - instr = 'negx' - size = instruction >> 6 - elif msb == 0x42: - # move to ccr, clr - if instruction & 0xffc0 == 0x42c0: - # move to ccr - instr = 'move' - size = SIZE_WORD - source = OpRegisterDirect(size, 'ccr') - else: - instr = 'clr' - size = instruction >> 6 - elif msb == 0x44: - # move from ccr, neg - if instruction & 0xffc0 == 0x44c0: - # move from ccr - instr = 'move' - size = SIZE_WORD - dest = OpRegisterDirect(size, 'ccr') - else: - instr = 'neg' - size = instruction >> 6 - elif msb == 0x46: - # move from sr, not - if instruction & 0xffc0 == 0x46c0: - # move from sr - instr = 'move' - size = SIZE_WORD - dest = OpRegisterDirect(size, 'sr') - else: - instr = 'not' - size = instruction >> 6 - elif msb in (0x48, 0x4c): - # link, nbcd, movem, ext, swap, bkpt, pea, divs, divu, divsl, divul, muls, mulu - if instruction & 0xfff8 == 0x4808: - instr = 'link' - size = SIZE_LONG - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], size) - elif instruction & 0xffc0 == 0x4800: - instr = 'nbcd' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_BYTE) - skip_ea = True - elif instruction & 0xfb80 == 0x4880: - if instruction & 0x0040: - size = SIZE_LONG - else: - size = SIZE_WORD - if instruction & 0x0038: - instr = 'movem' - extra_source = 2 - extra = struct.unpack_from('>H', data, 2)[0] - reg_list = [] - if instruction & 0x0038 == 0x0020: - for k in range(16): - if extra << k & 0x8000: - reg_list.append(Registers[k]) - else: - for k in range(16): - if extra >> k & 0x0001: - reg_list.append(Registers[k]) - source = OpRegisterMovemList(size, reg_list) - else: - instr = 'ext' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - skip_ea = True - if instruction & 0x0400: - source, dest = dest, source - elif instruction & 0xfff8 == 0x4840: - instr = 'swap' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) - skip_ea = True - elif instruction & 0xfff8 == 0x4848: - instr = 'bkpt' - source = OpImmediate(SIZE_BYTE, instruction & 7) - skip_ea = True - elif instruction & 0xffc0 == 0x4840: - instr = 'pea' - size = SIZE_LONG - elif msb == 0x4c: - size = SIZE_LONG - extra_dest = 2 - extra = struct.unpack_from('>H', data, 2)[0] - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) - dh = Registers[extra & 7] - dl = Registers[(extra >> 12) & 7] - dest = OpRegisterDirect(size, dl) - if instruction & 0x0040: - if extra & 0x0800: - instr = 'divs' - else: - instr = 'divu' - if extra & 0x0400: - dest = OpRegisterDirectPair(size, dh, dl) - elif dh != dl: - dest = OpRegisterDirectPair(size, dh, dl) - instr += 'l' - else: - if extra & 0x0800: - instr = 'muls' - else: - instr = 'mulu' - if extra & 0x0400: - dest = OpRegisterDirectPair(size, dh, dl) - skip_ea = True - elif msb == 0x4a: - # bgnd, illegal, tas, tst - if instruction == 0x4afa: - instr = 'bgnd' - skip_ea = True - elif instruction == 0x4afc: - instr = 'illegal' - skip_ea = True - elif instruction & 0xffc0 == 0x4ac0: - instr = 'tas' - skip_ea = True - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) - else: - instr = 'tst' - size = instruction >> 6 - elif msb == 0x4e: - # trap, link, unlk, move, reset, nop, stop, rte, rtd, rts, trapv, rtr, movec, jsr, jmp - if instruction & 0xfff0 == 0x4e40: - instr = 'trap' - length = 2 - source = OpImmediate(SIZE_BYTE, instruction & 15) - skip_ea = True - elif instruction & 0xfff0 == 0x4e50: - if instruction & 0xfff8 == 0x4e50: - instr = 'link' - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], 1) - else: - instr = 'unlk' - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) - skip_ea = True - elif instruction & 0xfff0 == 0x4e60: - instr = 'move' - size = SIZE_LONG - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) - dest = OpRegisterDirect(size, 'usp') - if instruction & 0x08: - source, dest = dest, source - skip_ea = True - elif instruction == 0x4e70: - instr = 'reset' - skip_ea = True - elif instruction == 0x4e71: - instr = 'nop' - skip_ea = True - elif instruction == 0x4e72: - instr = 'stop' - source = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) - extra_source = 2 - skip_ea = True - elif instruction == 0x4e73: - instr = 'rte' - skip_ea = True - elif instruction == 0x4e74: - instr = 'rtd' - dest, extra_dest = self.decode_effective_address(7, 4, data[2:], SIZE_WORD) - skip_ea = True - elif instruction == 0x4e75: - instr = 'rts' - skip_ea = True - elif instruction == 0x4e76: - instr = 'trapv' - skip_ea = True - elif instruction == 0x4e77: - instr = 'rtr' - skip_ea = True - elif instruction & 0xfffe == 0x4e7A: - instr = 'movec' - size = SIZE_LONG - extended = struct.unpack_from('>H', data, 2)[0] - control_reg = self.control_registers.get(extended & 0x0fff, None) - reg = (extended >> 12) & 15 - if control_reg is None: - instr = None - else: - source = OpRegisterDirect(size, control_reg) - dest = OpRegisterDirect(size, Registers[reg]) - if instruction & 1: - source, dest = dest, source - extra_source = 2 - skip_ea = True - elif instruction & 0xff80 == 0x4e80: - if instruction & 0xffc0 == 0x4e80: - instr = 'jsr' - else: - instr = 'jmp' - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) - skip_ea = True - if instr is not None: - if size is not None: - size &= 3 - if skip_ea: - pass - elif dest is None: - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) - else: - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) - if extra_source is None or extra_dest is None: - instr = None - else: - length = 2+extra_source+extra_dest - elif operation_code == 0x5: - # ADDQ/SUBQ/Scc/DBcc/TRAPcc - if instruction & 0xf0c0 == 0x50c0: - if instruction & 0xf0f8 == 0x50c8: - instr = 'db'+Condition[(instruction >> 8) & 0xf] - source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) - dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', struct.unpack_from('>h', data, 2)[0]) - length = 4 - elif instruction & 0xf0ff in (0x50fa, 0x50fb, 0x50fc): - instr = 'trap'+Condition[(instruction >> 8) & 0xf] - if instruction & 7 == 2: - length = 4 - source = OpImmediate(SIZE_WORD, struct.unpack_from('>H', data, 2)[0]) - elif instruction & 7 == 3: - length = 6 - source = OpImmediate(SIZE_LONG, struct.unpack_from('>L', data, 2)[0]) - elif instruction & 7 == 4: - length = 2 - else: - instr = 's'+Condition[(instruction >> 8) & 0xf] - size = SIZE_BYTE - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_dest is None: - return error_value - length = 2+extra_dest - else: - if instruction & 0x0100: - instr = 'subq' - else: - instr = 'addq' - val = (instruction >> 9) & 7 - if val == 0: - val = 8 - size = (instruction >> 6) & 3 - source = OpImmediate(SIZE_BYTE, val) - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_dest is None: - return error_value - length = 2+extra_dest - elif operation_code == 0x6: - # Bcc/BSR/BRA - if msb == 0x60: - instr = 'bra' - elif msb == 0x61: - instr = 'bsr' - else: - instr = 'b'+Condition[(instruction >> 8) & 0xf] - val = instruction & 0xff - if val == 0: - val = struct.unpack_from('>h', data, 2)[0] - length = 4 - elif val == 0xff: - val = struct.unpack_from('>L', data, 2)[0] - length = 6 - else: - if val & 0x80: - val -= 256 - length = 2 - dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', val) - elif operation_code == 0x7: - # MOVEQ - instr = 'moveq' - size = SIZE_LONG - val = instruction & 0xff - if val & 0x80: - val |= 0xffffff00 - source = OpImmediate(size, val) - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - length = 2 - elif operation_code == 0x8: - # OR/DIV/SBCD - if instruction & 0xf0c0 == 0x80c0: - if instruction & 0x0100: - instr = 'divs' - else: - instr = 'divu' - size = SIZE_WORD - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if extra_source is None: - return error_value - length = 2+extra_source - elif instruction & 0xf1f0 == 0x8100: - instr = 'sbcd' - length = 2 - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - elif instruction & 0xf130 == 0x8100: - if instruction & 0x0040: - instr = 'pack' - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[(instruction & 7) + 8]) - else: - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) - else: - instr = 'unpk' - if instruction & 8: - dest = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[((instruction >> 9) & 7) + 8]) - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - else: - dest = OpRegisterDirect(SIZE_WORD, Registers[(instruction >> 9) & 7]) - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - length = 4 - third = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) - else: - instr = 'or' - opmode = (instruction >> 6) & 0x7 - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if opmode & 4: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0x9: - # SUB/SUBA/SUBX - instr = 'sub' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'suba' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'sub' and opmode & 4: - if isinstance(source, OpRegisterDirect): - instr = 'subx' - if source.reg[0] == 'a' or source.reg == 'sp': - source = OpRegisterIndirectPredecrement(size, source.reg) - dest = OpRegisterIndirectPredecrement(size, dest.reg) - else: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xa: - # (unassigned, reserved) - pass - elif operation_code == 0xb: - # CMP/EOR - instr = 'cmp' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'cmpa' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(size, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'cmp' and opmode & 4: - if instruction & 0x0038 == 0x0008: - instr = 'cmpm' - source = OpRegisterIndirectPostincrement(size, Registers[instruction & 15]) - dest = OpRegisterIndirectPostincrement(size, Registers[((instruction >> 9) & 7) + 8]) - else: - source, dest = dest, source - instr = 'eor' - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xc: - # AND/MUL/ABCD/EXG - if instruction & 0xf0c0 == 0xc0c0: - if instruction & 0x0100: - instr = 'muls' - else: - instr = 'mulu' - size = SIZE_WORD - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - if extra_source is None: - return error_value - length = 2+extra_source - elif instruction & 0xf130 == 0xc100: - if instruction & 0xf1f0 == 0xc100: - instr = 'abcd' - if instruction & 0x0008: - source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) - dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) - else: - source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) - dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) - else: - instr = 'exg' - size = SIZE_LONG - source = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - dest = OpRegisterDirect(size, Registers[instruction & 7]) - if instruction & 0xf1f8 == 0xc148: - source = OpRegisterIndirectPredecrement(size, Registers[((instruction >> 9) & 7) + 8]) - dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) - if instruction & 0xf1f8 == 0xc188: - dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) - length = 2 - else: - instr = 'and' - opmode = (instruction >> 6) & 0x7 - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if opmode & 4: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xd: - # ADD/ADDA/ADDX - instr = 'add' - opmode = (instruction >> 6) & 0x7 - if opmode in (0x03, 0x07): - instr = 'adda' - if opmode == 0x03: - size = SIZE_WORD - else: - size = SIZE_LONG - dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) - else: - size = (instruction >> 6) & 3 - dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) - source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - if instr == 'add' and opmode & 4: - if isinstance(source, OpRegisterDirect): - instr = 'addx' - if source.reg[0] == 'a' or source.reg == 'sp': - source = OpRegisterIndirectPredecrement(size, source.reg) - dest = OpRegisterIndirectPredecrement(size, dest.reg) - else: - source, dest = dest, source - if extra_source is None: - return error_value - length = 2+extra_source - elif operation_code == 0xe: - # shift/rotate/bit field - if instruction & 0xF8C0 == 0xE0C0: - # shift/rotate - size = SIZE_WORD - direction = (instruction >> 8) & 1 - style = (instruction >> 9) & 3 - dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) - instr = ShiftStyle[style] - if direction: - instr += 'l' - else: - instr += 'r' - if extra_dest is None: - return error_value - length = 2+extra_dest - elif instruction & 0xF8C0 == 0xE8C0: - # bit field instructions - # TODO - style = (instruction >> 8) & 0x7 - instr = 'bf'+BitfieldStyle[style] - length = 4 - else: - # shift/rotate - size = (instruction >> 6) & 3 - direction = (instruction >> 8) & 1 - style = (instruction >> 3) & 3 - if (instruction >> 5) & 1: - source = OpRegisterDirect(SIZE_LONG, Registers[(instruction >> 9) & 7]) - else: - val = (instruction >> 9) & 7 - if val == 0: - val = 8 - source = OpImmediate(SIZE_BYTE, val) - dest = OpRegisterDirect(size, Registers[instruction & 7]) - instr = ShiftStyle[style] - if direction: - instr += 'l' - else: - instr += 'r' - length = 2 - elif operation_code == 0xf: - if instruction & 0xff20 == 0xf420: - instr = 'cpush' - length = 2 - elif instruction & 0xff80 == 0xff80: - instruction = 'illFF' - length = 2 - # coprocessor instructions - # TODO - if instr is None: - # FIXME uncomment to debug - #log_error('Bad opcode at 0x{:x}'.format(addr)) - return error_value - - #print((instr, length, size, source, dest, third)) - return instr, length, size, source, dest, third + def __init__(self): + Architecture.__init__(self) + self.disasm = M68KDisasm(self.address_size, self.control_registers) def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: int, size: int, source: Optional[Operand], dest: Optional[Operand], third: Optional[Operand]): size_bytes = None @@ -3247,7 +1532,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) def get_instruction_info(self, data: bytes, addr: int) -> Optional[InstructionInfo]: - instr, length, _size, _source, dest, _third = self.decode_instruction(data, addr) + instr, length, _size, _source, dest, _third = self.disasm.decode_instruction(data, addr) if instr == 'unimplemented': return None @@ -3305,7 +1590,7 @@ def get_instruction_info(self, data: bytes, addr: int) -> Optional[InstructionIn return result def get_instruction_text(self, data: bytes, addr: int) -> Optional[Tuple[List['function.InstructionTextToken'], int]]: - instr, length, size, source, dest, third = self.decode_instruction(data, addr) + instr, length, size, source, dest, third = self.disasm.decode_instruction(data, addr) if size is not None: # pylint: disable=invalid-sequence-index @@ -3329,7 +1614,7 @@ def get_instruction_text(self, data: bytes, addr: int) -> Optional[Tuple[List['f return tokens, length def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.LowLevelILFunction) -> Optional[int]: - instr, length, size, source, dest, third = self.decode_instruction(data, addr) + instr, length, size, source, dest, third = self.disasm.decode_instruction(data, addr) if instr == 'movem': # movem overrides default predecrement/postincrement IL generation diff --git a/m68k_disasm.py b/m68k_disasm.py new file mode 100644 index 0000000..653034d --- /dev/null +++ b/m68k_disasm.py @@ -0,0 +1,878 @@ +""" + +Copyright (c) 2017 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +import struct +import traceback +import os + +from binaryninja.architecture import Architecture, RegisterInfo, InstructionInfo, InstructionTextToken +from binaryninja.lowlevelil import LowLevelILFunction, LowLevelILLabel, LLIL_TEMP +from binaryninja.binaryview import BinaryView +from binaryninja.plugin import PluginCommand +from binaryninja.interaction import AddressField, ChoiceField, get_form_input +from binaryninja.types import Symbol +from binaryninja.log import log_error +from binaryninja.enums import (Endianness, BranchType, InstructionTextTokenType, + LowLevelILOperation, LowLevelILFlagCondition, FlagRole, SegmentFlag, + ImplicitRegisterExtend, SymbolType) +from binaryninja import BinaryViewType + +from .m68k_ops import * + +class M68KDisasm: + address_size = None + control_registers = None + + def __init__(self, address_size, control_registers): + self.address_size = address_size + self.control_registers = control_registers + + def decode_effective_address(self, mode: int, register: int, data: bytes, size: Optional[int] = None) -> Tuple[Optional[Operand], Optional[int]]: + mode &= 0x07 + register &= 0x07 + + reg = None + + if mode == 0: + # data register direct + return (OpRegisterDirect(size, Registers[register]), 0) + elif mode == 1: + # address register direct + return (OpRegisterDirect(size, Registers[register+8]), 0) + elif mode == 2: + # address register indirect + return (OpRegisterIndirect(size, Registers[register+8]), 0) + elif mode == 3: + # address register indirect with postincrement + return (OpRegisterIndirectPostincrement(size, Registers[register+8]), 0) + elif mode == 4: + # address register indirect with predecrement + return (OpRegisterIndirectPredecrement(size, Registers[register+8]), 0) + elif mode == 5: + # address register indirect with displacement + return (OpRegisterIndirectDisplacement(size, Registers[register+8], struct.unpack_from('>h', data, 0)[0]), 2) + elif mode == 6: + # extended addressing mode + reg = Registers[register+8] + elif mode == 7: + if register == 0: + # absolute short + val = struct.unpack_from('>H', data, 0)[0] + if val & 0x8000: + if self.address_size == 4: + val |= 0xffff0000 # extend to 32-bits + else: + val |= 0xff0000 # extend to 24-bits (for 68000) + return (OpAbsolute(size, val, 1, self.address_size), 2) + if register == 1: + # absolute long + return (OpAbsolute(size, struct.unpack_from('>L', data, 0)[0], 2, self.address_size), 4) + elif register == 2: + # program counter indirect with displacement + return (OpRegisterIndirectDisplacement(size, 'pc', struct.unpack_from('>h', data, 0)[0]), 2) + elif register == 3: + # extended addressing mode + reg = 'pc' + elif register == 4: + # immediate + if size == None: + # unspecified length + return (OpImmediate(size, None), None) + elif size == SIZE_BYTE: + # byte + return (OpImmediate(size, struct.unpack_from('>b', data, 1)[0]), 2) + elif size == 1: + # word + return (OpImmediate(size, struct.unpack_from('>h', data, 0)[0]), 2) + elif size == 2: + # long + return (OpImmediate(size, struct.unpack_from('>l', data, 0)[0]), 4) + + if reg is not None: + extra = struct.unpack_from('>H', data, 0)[0] + # index register + xn = Registers[extra >> 12] + # index register size + index_size = (extra >> 11) & 1 + # index register scale + scale = 1 << ((extra >> 9) & 3) + length = 2 + + if extra & 0x0100: + # full extension word + bd = 0 + od = 0 + + # base displacement + if not (extra >> 7) & 1: + if (extra >> 4) & 3 == 2: + # word base displacement + bd = struct.unpack_from('>h', data, length)[0] + length += 2 + elif (extra >> 4) & 3 == 3: + # long base displacement + bd = struct.unpack_from('>L', data, length)[0] + length += 4 + + # outer displacement + if extra & 3 == 2: + # word outer displacement + od = struct.unpack_from('>h', data, length)[0] + length += 2 + elif extra & 3 == 3: + # long outer displacement + od = struct.unpack_from('>L', data, length)[0] + length += 4 + + # suppress index register + if extra & 7 == 0: + return (OpRegisterIndirectIndex(size, reg, bd, xn, index_size, scale), length) + elif (extra >> 6) & 1: + return (OpMemoryIndirect(size, reg, bd, od), length) + elif (extra >> 2) & 1: + return (OpMemoryIndirectPostindex(size, reg, bd, xn, index_size, scale, od), length) + else: + return (OpMemoryIndirectPreindex(size, reg, bd, xn, index_size, scale, od), length) + else: + # brief extension word + # 8 bit displacement + d8 = extra & 0xff + if d8 & 0x80: + d8 -= 256 + return (OpRegisterIndirectIndex(size, reg, d8, xn, index_size, scale), length) + + return (None, None) + + def decode_instruction(self, data: bytes, addr: int) -> Tuple[str, int, Optional[int], Optional[Operand], Optional[Operand], Optional[Operand]]: + error_value = ('unimplemented', len(data), None, None, None, None) + if len(data) < 2: + return error_value + + instruction = struct.unpack_from('>H', data)[0] + + msb = instruction >> 8 + operation_code = msb >> 4 + + #print((hex(addr), hex(instruction))) + + instr = None + length = None + size = None + source = None + dest = None + third = None + + if operation_code == 0x0: + # Bit manipulation/MOVEP/Immed late + if instruction & 0xf9c0 == 0x00c0: + # rtm, callm, chk2, cmp2 + if instruction & 0xfff0 == 0x06c0: + instr = 'rtm' + dest = OpRegisterDirect(SIZE_LONG, Registers[instruction & 15]) + length = 2 + elif instruction & 0xffc0 == 0x06c0: + instr = 'callm' + source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check + if extra_dest is None: + return error_value + length = 4+extra_dest + else: + size = (instruction >> 9) & 3 + extra = struct.unpack_from('>H', data, 2)[0] + if extra & 0x0800: + instr = 'chk2' + else: + instr = 'cmp2' + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) # check + dest = OpRegisterDirect(size, Registers[(instruction >> 12) & 15]) + if extra_source is None: + return error_value + length = 4+extra_source + elif instruction & 0xffc0 in (0x0ac0, 0x0cc0, 0x0ec0): + if instruction & 0xf9ff == 0x08fc: + instr = 'cas2' + size = ((instruction >> 9) & 3) - 1 + extra1 = struct.unpack_from('>H', data, 2)[0] + extra2 = struct.unpack_from('>H', data, 4)[0] + source = OpRegisterDirectPair(size, Registers[extra1 & 7], Registers[extra2 & 7]) + dest = OpRegisterDirectPair(size, Registers[(extra1 >> 6) & 7], Registers[(extra2 >> 6) & 7]) + third = OpRegisterIndirectPair(size, Registers[(extra1 >> 12) & 15], Registers[(extra2 >> 12) & 15]) + length = 6 + else: + instr = 'cas' + size = ((instruction >> 9) & 3) - 1 + extra = struct.unpack_from('>H', data, 2)[0] + source = OpRegisterDirect(size, Registers[extra & 7]) + dest = OpRegisterDirect(size, Registers[(extra >> 6) & 7]) + third, extra_third = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) + if extra_third is None: + return error_value + length = 4+extra_third + elif msb in (0x00, 0x02, 0x04, 0x06, 0x0a, 0x0c): + # ORI, ANDI, SUBI, ADDI, EORI, CMPI + if msb == 0x00: + instr = 'ori' + elif msb == 0x02: + instr = 'andi' + elif msb == 0x04: + instr = 'subi' + elif msb == 0x06: + instr = 'addi' + elif msb == 0x0a: + instr = 'eori' + elif msb == 0x0c: + instr = 'cmpi' + size = (instruction >> 6) & 0x03 + source, extra_source = self.decode_effective_address(7, 4, data[2:], size) + if instruction & 0x00ff == 0x003c: + dest = OpRegisterDirect(size, 'ccr') + extra_dest = 0 + elif instruction & 0x00ff == 0x007c: + dest = OpRegisterDirect(size, 'sr') + extra_dest = 0 + else: + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + + if dest is None: + instr = None + else: + length = 2+extra_source+extra_dest + elif msb == 0x08: + # btst, bchg, bclr, bset with constant + if instruction & 0xffc0 == 0x0800: + instr = 'btst' + elif instruction & 0xffc0 == 0x0840: + instr = 'bchg' + elif instruction & 0xffc0 == 0x0880: + instr = 'bclr' + elif instruction & 0xffc0 == 0x08C0: + instr = 'bset' + source = OpImmediate(SIZE_BYTE, struct.unpack_from('>B', data, 3)[0]) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[4:], SIZE_BYTE) + if isinstance(dest, OpRegisterDirect): + dest.size = SIZE_LONG + if dest is None: + instr = None + else: + length = 4+extra_dest + elif msb & 0xf1 == 0x01: + # movep, btst, bchg, bclr, bset with register + if instruction & 0xf138 == 0x0108: + instr = 'movep' + size = ((instruction >> 6) & 1) + 1 + source, extra_source = self.decode_effective_address(5, instruction, data[2:], SIZE_BYTE) # check + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + length = 2+extra_source + if instruction & 0x0080: + source, dest = dest, source + else: + if instruction & 0xf1c0 == 0x0100: + instr = 'btst' + elif instruction & 0xf1c0 == 0x0140: + instr = 'bchg' + elif instruction & 0xf1c0 == 0x0180: + instr = 'bclr' + elif instruction & 0xf1c0 == 0x01c0: + instr = 'bset' + source = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) # check + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) + if isinstance(dest, OpRegisterDirect): + dest.size = SIZE_LONG + if dest is None: + instr = None + else: + length = 2+extra_dest + elif instruction & 0xff00 == 0x0e00: + instr = 'moves' + extra = struct.unpack_from('>H', data, 2)[0] + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[extra >> 12]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[4:], size) + if extra & 0x0800: + source, dest = dest, source + if extra_source is None: + return error_value + length = 4+extra_source + elif operation_code in (0x1, 0x2, 0x3): + # move + instr = 'move' + if operation_code == 0x1: + # Move byte + size = SIZE_BYTE + elif operation_code == 0x2: + # Move long + size = SIZE_LONG + elif operation_code == 0x3: + # Move word + size = SIZE_WORD + + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if source is None: + instr = None + else: + dest, extra_dest = self.decode_effective_address(instruction >> 6, instruction >> 9, data[2+extra_source:], size) + if dest is None or isinstance(dest, OpImmediate): + instr = None + else: + if isinstance(dest, OpRegisterDirect) and (dest.reg[0] == 'a' or dest.reg == 'sp'): + instr = 'movea' + length = 2+extra_source+extra_dest + elif operation_code == 0x4: + # Miscellaneous + extra_source = 0 + extra_dest = 0 + size = None + skip_ea = False + if instruction & 0xf100 == 0x4100: + # lea, extb, chk + if instruction & 0xf1c0 == 0x41c0: + if instruction & 0x0038: + instr = 'lea' + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + instr = 'extb' + size = SIZE_LONG + else: + instr = 'chk' + if instruction & 0x0080: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + elif msb == 0x40: + # move from sr, negx + if instruction & 0xffc0 == 0x40c0: + # move from sr + instr = 'move' + size = SIZE_WORD + source = OpRegisterDirect(size, 'sr') + else: + instr = 'negx' + size = instruction >> 6 + elif msb == 0x42: + # move to ccr, clr + if instruction & 0xffc0 == 0x42c0: + # move to ccr + instr = 'move' + size = SIZE_WORD + source = OpRegisterDirect(size, 'ccr') + else: + instr = 'clr' + size = instruction >> 6 + elif msb == 0x44: + # move from ccr, neg + if instruction & 0xffc0 == 0x44c0: + # move from ccr + instr = 'move' + size = SIZE_WORD + dest = OpRegisterDirect(size, 'ccr') + else: + instr = 'neg' + size = instruction >> 6 + elif msb == 0x46: + # move from sr, not + if instruction & 0xffc0 == 0x46c0: + # move from sr + instr = 'move' + size = SIZE_WORD + dest = OpRegisterDirect(size, 'sr') + else: + instr = 'not' + size = instruction >> 6 + elif msb in (0x48, 0x4c): + # link, nbcd, movem, ext, swap, bkpt, pea, divs, divu, divsl, divul, muls, mulu + if instruction & 0xfff8 == 0x4808: + instr = 'link' + size = SIZE_LONG + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], size) + elif instruction & 0xffc0 == 0x4800: + instr = 'nbcd' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_BYTE) + skip_ea = True + elif instruction & 0xfb80 == 0x4880: + if instruction & 0x0040: + size = SIZE_LONG + else: + size = SIZE_WORD + if instruction & 0x0038: + instr = 'movem' + extra_source = 2 + extra = struct.unpack_from('>H', data, 2)[0] + reg_list = [] + if instruction & 0x0038 == 0x0020: + for k in range(16): + if extra << k & 0x8000: + reg_list.append(Registers[k]) + else: + for k in range(16): + if extra >> k & 0x0001: + reg_list.append(Registers[k]) + source = OpRegisterMovemList(size, reg_list) + else: + instr = 'ext' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + skip_ea = True + if instruction & 0x0400: + source, dest = dest, source + elif instruction & 0xfff8 == 0x4840: + instr = 'swap' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) + skip_ea = True + elif instruction & 0xfff8 == 0x4848: + instr = 'bkpt' + source = OpImmediate(SIZE_BYTE, instruction & 7) + skip_ea = True + elif instruction & 0xffc0 == 0x4840: + instr = 'pea' + size = SIZE_LONG + elif msb == 0x4c: + size = SIZE_LONG + extra_dest = 2 + extra = struct.unpack_from('>H', data, 2)[0] + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) + dh = Registers[extra & 7] + dl = Registers[(extra >> 12) & 7] + dest = OpRegisterDirect(size, dl) + if instruction & 0x0040: + if extra & 0x0800: + instr = 'divs' + else: + instr = 'divu' + if extra & 0x0400: + dest = OpRegisterDirectPair(size, dh, dl) + elif dh != dl: + dest = OpRegisterDirectPair(size, dh, dl) + instr += 'l' + else: + if extra & 0x0800: + instr = 'muls' + else: + instr = 'mulu' + if extra & 0x0400: + dest = OpRegisterDirectPair(size, dh, dl) + skip_ea = True + elif msb == 0x4a: + # bgnd, illegal, tas, tst + if instruction == 0x4afa: + instr = 'bgnd' + skip_ea = True + elif instruction == 0x4afc: + instr = 'illegal' + skip_ea = True + elif instruction & 0xffc0 == 0x4ac0: + instr = 'tas' + skip_ea = True + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], SIZE_BYTE) + else: + instr = 'tst' + size = instruction >> 6 + elif msb == 0x4e: + # trap, link, unlk, move, reset, nop, stop, rte, rtd, rts, trapv, rtr, movec, jsr, jmp + if instruction & 0xfff0 == 0x4e40: + instr = 'trap' + length = 2 + source = OpImmediate(SIZE_BYTE, instruction & 15) + skip_ea = True + elif instruction & 0xfff0 == 0x4e50: + if instruction & 0xfff8 == 0x4e50: + instr = 'link' + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], 1) + else: + instr = 'unlk' + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) + skip_ea = True + elif instruction & 0xfff0 == 0x4e60: + instr = 'move' + size = SIZE_LONG + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction & 7) + 8]) + dest = OpRegisterDirect(size, 'usp') + if instruction & 0x08: + source, dest = dest, source + skip_ea = True + elif instruction == 0x4e70: + instr = 'reset' + skip_ea = True + elif instruction == 0x4e71: + instr = 'nop' + skip_ea = True + elif instruction == 0x4e72: + instr = 'stop' + source = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) + extra_source = 2 + skip_ea = True + elif instruction == 0x4e73: + instr = 'rte' + skip_ea = True + elif instruction == 0x4e74: + instr = 'rtd' + dest, extra_dest = self.decode_effective_address(7, 4, data[2:], SIZE_WORD) + skip_ea = True + elif instruction == 0x4e75: + instr = 'rts' + skip_ea = True + elif instruction == 0x4e76: + instr = 'trapv' + skip_ea = True + elif instruction == 0x4e77: + instr = 'rtr' + skip_ea = True + elif instruction & 0xfffe == 0x4e7A: + instr = 'movec' + size = SIZE_LONG + extended = struct.unpack_from('>H', data, 2)[0] + control_reg = self.control_registers.get(extended & 0x0fff, None) + reg = (extended >> 12) & 15 + if control_reg is None: + instr = None + else: + source = OpRegisterDirect(size, control_reg) + dest = OpRegisterDirect(size, Registers[reg]) + if instruction & 1: + source, dest = dest, source + extra_source = 2 + skip_ea = True + elif instruction & 0xff80 == 0x4e80: + if instruction & 0xffc0 == 0x4e80: + instr = 'jsr' + else: + instr = 'jmp' + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], SIZE_LONG) + skip_ea = True + if instr is not None: + if size is not None: + size &= 3 + if skip_ea: + pass + elif dest is None: + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_source:], size) + else: + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2+extra_dest:], size) + if extra_source is None or extra_dest is None: + instr = None + else: + length = 2+extra_source+extra_dest + elif operation_code == 0x5: + # ADDQ/SUBQ/Scc/DBcc/TRAPcc + if instruction & 0xf0c0 == 0x50c0: + if instruction & 0xf0f8 == 0x50c8: + instr = 'db'+Condition[(instruction >> 8) & 0xf] + source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) + dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', struct.unpack_from('>h', data, 2)[0]) + length = 4 + elif instruction & 0xf0ff in (0x50fa, 0x50fb, 0x50fc): + instr = 'trap'+Condition[(instruction >> 8) & 0xf] + if instruction & 7 == 2: + length = 4 + source = OpImmediate(SIZE_WORD, struct.unpack_from('>H', data, 2)[0]) + elif instruction & 7 == 3: + length = 6 + source = OpImmediate(SIZE_LONG, struct.unpack_from('>L', data, 2)[0]) + elif instruction & 7 == 4: + length = 2 + else: + instr = 's'+Condition[(instruction >> 8) & 0xf] + size = SIZE_BYTE + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_dest is None: + return error_value + length = 2+extra_dest + else: + if instruction & 0x0100: + instr = 'subq' + else: + instr = 'addq' + val = (instruction >> 9) & 7 + if val == 0: + val = 8 + size = (instruction >> 6) & 3 + source = OpImmediate(SIZE_BYTE, val) + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_dest is None: + return error_value + length = 2+extra_dest + elif operation_code == 0x6: + # Bcc/BSR/BRA + if msb == 0x60: + instr = 'bra' + elif msb == 0x61: + instr = 'bsr' + else: + instr = 'b'+Condition[(instruction >> 8) & 0xf] + val = instruction & 0xff + if val == 0: + val = struct.unpack_from('>h', data, 2)[0] + length = 4 + elif val == 0xff: + val = struct.unpack_from('>L', data, 2)[0] + length = 6 + else: + if val & 0x80: + val -= 256 + length = 2 + dest = OpRegisterIndirectDisplacement(SIZE_LONG, 'pc', val) + elif operation_code == 0x7: + # MOVEQ + instr = 'moveq' + size = SIZE_LONG + val = instruction & 0xff + if val & 0x80: + val |= 0xffffff00 + source = OpImmediate(size, val) + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + length = 2 + elif operation_code == 0x8: + # OR/DIV/SBCD + if instruction & 0xf0c0 == 0x80c0: + if instruction & 0x0100: + instr = 'divs' + else: + instr = 'divu' + size = SIZE_WORD + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if extra_source is None: + return error_value + length = 2+extra_source + elif instruction & 0xf1f0 == 0x8100: + instr = 'sbcd' + length = 2 + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + elif instruction & 0xf130 == 0x8100: + if instruction & 0x0040: + instr = 'pack' + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[(instruction & 7) + 8]) + else: + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_WORD, Registers[instruction & 7]) + else: + instr = 'unpk' + if instruction & 8: + dest = OpRegisterIndirectPredecrement(SIZE_WORD, Registers[((instruction >> 9) & 7) + 8]) + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + else: + dest = OpRegisterDirect(SIZE_WORD, Registers[(instruction >> 9) & 7]) + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + length = 4 + third = OpImmediate(SIZE_WORD, struct.unpack_from(">H", data, 2)[0]) + else: + instr = 'or' + opmode = (instruction >> 6) & 0x7 + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if opmode & 4: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0x9: + # SUB/SUBA/SUBX + instr = 'sub' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'suba' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'sub' and opmode & 4: + if isinstance(source, OpRegisterDirect): + instr = 'subx' + if source.reg[0] == 'a' or source.reg == 'sp': + source = OpRegisterIndirectPredecrement(size, source.reg) + dest = OpRegisterIndirectPredecrement(size, dest.reg) + else: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xa: + # (unassigned, reserved) + pass + elif operation_code == 0xb: + # CMP/EOR + instr = 'cmp' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'cmpa' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(size, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'cmp' and opmode & 4: + if instruction & 0x0038 == 0x0008: + instr = 'cmpm' + source = OpRegisterIndirectPostincrement(size, Registers[instruction & 15]) + dest = OpRegisterIndirectPostincrement(size, Registers[((instruction >> 9) & 7) + 8]) + else: + source, dest = dest, source + instr = 'eor' + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xc: + # AND/MUL/ABCD/EXG + if instruction & 0xf0c0 == 0xc0c0: + if instruction & 0x0100: + instr = 'muls' + else: + instr = 'mulu' + size = SIZE_WORD + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + if extra_source is None: + return error_value + length = 2+extra_source + elif instruction & 0xf130 == 0xc100: + if instruction & 0xf1f0 == 0xc100: + instr = 'abcd' + if instruction & 0x0008: + source = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[(instruction & 7) + 8]) + dest = OpRegisterIndirectPredecrement(SIZE_BYTE, Registers[((instruction >> 9) & 7) + 8]) + else: + source = OpRegisterDirect(SIZE_BYTE, Registers[instruction & 7]) + dest = OpRegisterDirect(SIZE_BYTE, Registers[(instruction >> 9) & 7]) + else: + instr = 'exg' + size = SIZE_LONG + source = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + dest = OpRegisterDirect(size, Registers[instruction & 7]) + if instruction & 0xf1f8 == 0xc148: + source = OpRegisterIndirectPredecrement(size, Registers[((instruction >> 9) & 7) + 8]) + dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) + if instruction & 0xf1f8 == 0xc188: + dest = OpRegisterIndirectPredecrement(size, Registers[(instruction & 7) + 8]) + length = 2 + else: + instr = 'and' + opmode = (instruction >> 6) & 0x7 + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if opmode & 4: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xd: + # ADD/ADDA/ADDX + instr = 'add' + opmode = (instruction >> 6) & 0x7 + if opmode in (0x03, 0x07): + instr = 'adda' + if opmode == 0x03: + size = SIZE_WORD + else: + size = SIZE_LONG + dest = OpRegisterDirect(SIZE_LONG, Registers[((instruction >> 9) & 7) + 8]) + else: + size = (instruction >> 6) & 3 + dest = OpRegisterDirect(size, Registers[(instruction >> 9) & 7]) + source, extra_source = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + if instr == 'add' and opmode & 4: + if isinstance(source, OpRegisterDirect): + instr = 'addx' + if source.reg[0] == 'a' or source.reg == 'sp': + source = OpRegisterIndirectPredecrement(size, source.reg) + dest = OpRegisterIndirectPredecrement(size, dest.reg) + else: + source, dest = dest, source + if extra_source is None: + return error_value + length = 2+extra_source + elif operation_code == 0xe: + # shift/rotate/bit field + if instruction & 0xF8C0 == 0xE0C0: + # shift/rotate + size = SIZE_WORD + direction = (instruction >> 8) & 1 + style = (instruction >> 9) & 3 + dest, extra_dest = self.decode_effective_address(instruction >> 3, instruction, data[2:], size) + instr = ShiftStyle[style] + if direction: + instr += 'l' + else: + instr += 'r' + if extra_dest is None: + return error_value + length = 2+extra_dest + elif instruction & 0xF8C0 == 0xE8C0: + # bit field instructions + # TODO + style = (instruction >> 8) & 0x7 + instr = 'bf'+BitfieldStyle[style] + length = 4 + else: + # shift/rotate + size = (instruction >> 6) & 3 + direction = (instruction >> 8) & 1 + style = (instruction >> 3) & 3 + if (instruction >> 5) & 1: + source = OpRegisterDirect(SIZE_LONG, Registers[(instruction >> 9) & 7]) + else: + val = (instruction >> 9) & 7 + if val == 0: + val = 8 + source = OpImmediate(SIZE_BYTE, val) + dest = OpRegisterDirect(size, Registers[instruction & 7]) + instr = ShiftStyle[style] + if direction: + instr += 'l' + else: + instr += 'r' + length = 2 + elif operation_code == 0xf: + if instruction & 0xff20 == 0xf420: + instr = 'cpush' + length = 2 + elif instruction & 0xff80 == 0xff80: + instruction = 'illFF' + length = 2 + # coprocessor instructions + # TODO + if instr is None: + # FIXME uncomment to debug + #log_error('Bad opcode at 0x{:x}'.format(addr)) + return error_value + + #print((instr, length, size, source, dest, third)) + return instr, length, size, source, dest, third + diff --git a/m68k_ops.py b/m68k_ops.py new file mode 100644 index 0000000..fb34e60 --- /dev/null +++ b/m68k_ops.py @@ -0,0 +1,936 @@ +""" + +Copyright (c) 2017 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +from typing import List, Optional, Tuple + +import struct +import traceback +import os + +from binaryninja.architecture import Architecture, RegisterInfo, InstructionInfo, InstructionTextToken +from binaryninja.lowlevelil import LowLevelILFunction, LowLevelILLabel, LLIL_TEMP, LowLevelILFunction, ExpressionIndex +from binaryninja.binaryview import BinaryView +from binaryninja.plugin import PluginCommand +from binaryninja.interaction import AddressField, ChoiceField, get_form_input +from binaryninja.types import Symbol +from binaryninja.log import log_error +from binaryninja.enums import (Endianness, BranchType, InstructionTextTokenType, + LowLevelILOperation, LowLevelILFlagCondition, FlagRole, SegmentFlag, + ImplicitRegisterExtend, SymbolType) +from binaryninja import BinaryViewType + + +# Shift syles +SHIFT_SYLE_ARITHMETIC = 0, +SHIFT_SYLE_LOGICAL = 1, +SHIFT_SYLE_ROTATE_WITH_EXTEND = 2, +SHIFT_SYLE_ROTATE = 3, + +ShiftStyle = [ + 'as', # SHIFT_SYLE_ARITHMETIC + 'ls', # SHIFT_SYLE_LOGICAL + 'rox', # SHIFT_SYLE_ROTATE_WITH_EXTEND + 'ro' # SHIFT_SYLE_ROTATE +] + +BITFIELD_STYLE_TST = 0, +BITFIELD_STYLE_EXTU = 1, +BITFIELD_STYLE_CHG = 2, +BITFIELD_STYLE_EXTS = 3, +BITFIELD_STYLE_CLR = 4, +BITFIELD_STYLE_FFO = 5, +BITFIELD_STYLE_SET = 6, +BITFIELD_STYLE_INS = 7, + +BitfieldStyle = [ + "tst", # BITFIELD_STYLE_TST + "extu", # BITFIELD_STYLE_EXTU + "chg", # BITFIELD_STYLE_CHG + "exts", # BITFIELD_STYLE_EXTS + "clr", # BITFIELD_STYLE_CLR + "ffo", # BITFIELD_STYLE_FFO + "set", # BITFIELD_STYLE_SET + "ins", # BITFIELD_STYLE_INS +] + + +# Condition codes +CONDITION_TRUE = 0 +CONDITION_FALSE = 1 +CONDITION_HIGH = 2 +CONDITION_LESS_OR_SAME = 3 +CONDITION_CARRY_CLEAR = 4 +CONDITION_CARRY_SET = 5 +CONDITION_NOT_EQUAL = 6 +CONDITION_EQUAL = 7 +CONDITION_OVERFLOW_CLEAR = 8 +CONDITION_OVERFLOW_SET = 9 +CONDITION_PLUS = 10 +CONDITION_MINUS = 11 +CONDITION_GREATER_OR_EQUAL = 12 +CONDITION_LESS_THAN = 13 +CONDITION_GREATER_THAN = 14 +CONDITION_LESS_OR_EQUAL = 15 + +Condition = [ + 't', # CONDITION_TRUE + 'f', # CONDITION_FALSE + 'hi', # CONDITION_HIGH + 'ls', # CONDITION_LESS_OR_SAME + 'cc', # CONDITION_CARRY_CLEAR + 'cs', # CONDITION_CARRY_SET + 'ne', # CONDITION_NOT_EQUAL + 'eq', # CONDITION_EQUAL + 'vc', # CONDITION_OVERFLOW_CLEAR + 'vs', # CONDITION_OVERFLOW_SET + 'pl', # CONDITION_PLUS + 'mi', # CONDITION_MINUS + 'ge', # CONDITION_GREATER_OR_EQUAL + 'lt', # CONDITION_LESS_THAN + 'gt', # CONDITION_GREATER_THAN + 'le' # CONDITION_LESS_OR_EQUAL +] + +# Registers +REGISTER_D0 = 0 +REGISTER_D1 = 1 +REGISTER_D2 = 2 +REGISTER_D3 = 3 +REGISTER_D4 = 4 +REGISTER_D5 = 5 +REGISTER_D6 = 6 +REGISTER_D7 = 7 +REGISTER_A0 = 8 +REGISTER_A1 = 9 +REGISTER_A2 = 10 +REGISTER_A3 = 11 +REGISTER_A4 = 12 +REGISTER_A5 = 13 +REGISTER_A6 = 14 +REGISTER_A7 = 15 + +Registers = [ + 'd0', # REGISTER_D0 + 'd1', # REGISTER_D1 + 'd2', # REGISTER_D2 + 'd3', # REGISTER_D3 + 'd4', # REGISTER_D4 + 'd5', # REGISTER_D5 + 'd6', # REGISTER_D6 + 'd7', # REGISTER_D7 + 'a0', # REGISTER_A0 + 'a1', # REGISTER_A1 + 'a2', # REGISTER_A2 + 'a3', # REGISTER_A3 + 'a4', # REGISTER_A4 + 'a5', # REGISTER_A5 + 'a6', # REGISTER_A6 + 'sp' # REGISTER_A7 +] + +# Sizes +SIZE_BYTE = 0 +SIZE_WORD = 1 +SIZE_LONG = 2 + +SizeSuffix = [ + '.b', # SIZE_BYTE + '.w', # SIZE_WORD + '.l', # SIZE_LONG +] + +# Operands + +class Operand: + def format(self, addr: int) -> List[InstructionTextToken]: + raise NotImplementedError + + def get_pre_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_post_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[Optional[ExpressionIndex], List[ExpressionIndex]]: + raise NotImplementedError + + def get_address_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + return self.get_address_il2(il)[0] + + def get_source_il(self, il: LowLevelILFunction) -> Optional[ExpressionIndex]: + raise NotImplementedError + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> Optional[ExpressionIndex]: + raise NotImplementedError + +class OpRegisterDirect(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterDirect(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # a0, d0 + return [ + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + if self.reg == 'ccr': + c = il.flag_bit(1, 'c', 0) + v = il.flag_bit(1, 'v', 1) + z = il.flag_bit(1, 'z', 2) + n = il.flag_bit(1, 'n', 3) + x = il.flag_bit(1, 'x', 4) + return il.or_expr(1, il.or_expr(1, il.or_expr(1, il.or_expr(1, c, v), z), n), x) + else: + return il.reg(1 << self.size, self.reg) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'ccr': + return il.unimplemented() + + # return il.set_reg(1 << self.size, self.reg, value) + # if self.size == SIZE_BYTE: + # if self.reg[0] == 'a' or self.reg == 'sp': + # return None + # else: + # return il.set_reg(1, self.reg+'.b', value, flags) + # elif self.size == SIZE_WORD: + # return il.set_reg(2, self.reg+'.w', value, flags) + # else: + # return il.set_reg(4, self.reg, value, flags) + if self.size == SIZE_BYTE: + if self.reg[0] == 'a' or self.reg == 'sp': + return il.unimplemented() + else: + return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffffff00), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xff), value)), flags) + elif self.size == SIZE_WORD: + if self.reg[0] == 'a' or self.reg == 'sp': + return il.set_reg(4, self.reg, il.sign_extend(4, value), flags) + else: + return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffff0000), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xffff), value)), flags) + else: + if value: + return il.set_reg(4, self.reg, value, flags) + else: + return il.unimplemented() + + +class OpRegisterDirectPair(Operand): + def __init__(self, size: int, reg1: str, reg2: str): + self.size = size + self.reg1 = reg1 + self.reg2 = reg2 + + def __repr__(self): + return "OpRegisterDirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0:d1 + return [ + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), + InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + return (il.set_reg(1 << self.size, self.reg1, values[0], flags), il.set_reg(1 << self.size, self.reg2, values[1], flags)) + + +class OpRegisterMovemList(Operand): + def __init__(self, size: int, regs: List[str]): + self.size = size + self.regs = regs + + def __repr__(self): + return "OpRegisterMovemList(%d, %s)" % (self.size, repr(self.regs)) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0-d7/a0/a2/a4-a7 + if len(self.regs) == 0: + return [] + tokens = [InstructionTextToken(InstructionTextTokenType.RegisterToken, self.regs[0])] + last = self.regs[0] + first = None + for reg in self.regs[1:]: + if Registers[Registers.index(last)+1] == reg and reg != 'a0': + if first is None: + first = last + last = reg + else: + if first is not None: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "/")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, reg)) + first = None + last = reg + if first is not None: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "-")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, last)) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return [il.reg(1 << self.size, reg) for reg in self.regs] + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + return [il.set_reg(1 << self.size, reg, val, flags) for reg, val in zip(self.regs, values)] + + +class OpRegisterIndirect(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirect(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # (a0) + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectPair(Operand): + def __init__(self, size: int, reg1: str, reg2: str): + self.size = size + self.reg1 = reg1 + self.reg2 = reg2 + + def __repr__(self): + return "OpRegisterIndirectPair(%d, %s, %s)" % (self.size, self.reg1, self.reg2) + + def format(self, addr: int) -> List[InstructionTextToken]: + # d0:d1 + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg1), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), + InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ":"), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg2), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # return (il.reg(4, self.reg1), il.reg(4, self.reg2)) + a = il.reg(4, self.reg1) + b = il.reg(4, self.reg2) + return ((a, b), [a, b]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) + + def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + #return (il.store(1 << self.size, il.reg(4, self.reg1), values[0], flags), il.store(1 << self.size, il.reg(4, self.reg2), values[1], flags)) + return (il.store(1 << self.size, il.reg(4, self.reg1), values[0]), il.store(1 << self.size, il.reg(4, self.reg2), values[1])) + + +class OpRegisterIndirectPostincrement(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirectPostincrement(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # (a0)+ + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"), + InstructionTextToken(InstructionTextTokenType.TextToken, "+") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.set_reg(4, + self.reg, + il.add(4, + il.reg(4, self.reg), + il.const(4, 1 << self.size) + ) + ) + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectPredecrement(Operand): + def __init__(self, size: int, reg: str): + self.size = size + self.reg = reg + + def __repr__(self): + return "OpRegisterIndirectPredecrement(%d, %s)" % (self.size, self.reg) + + def format(self, addr: int) -> List[InstructionTextToken]: + # -(a0) + return [ + InstructionTextToken(InstructionTextTokenType.TextToken, "-"), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.set_reg(4, + self.reg, + il.sub(4, + il.reg(4, self.reg), + il.const(4, 1 << self.size) + ) + ) + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.reg(4, self.reg) + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectDisplacement(Operand): + def __init__(self, size: int, reg: str, offset: int): + self.size = size + self.reg = reg + self.offset = offset + + def __repr__(self): + return "OpRegisterIndirectDisplacement(%d, %s, 0x%x)" % (self.size, self.reg, self.offset) + + def format(self, addr: int) -> List[InstructionTextToken]: + if self.reg == 'pc': + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:08x}".format(addr+2+self.offset), addr+2+self.offset, 4), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + else: + # $1234(a0) + return [ + InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:04x}".format(self.offset), self.offset, 2), + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")") + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + if self.reg == 'pc': + r = il.const_pointer(4, il.current_address+2+self.offset) + return (r, [r]) + else: + a = il.reg(4, self.reg) + b = il.const(2, self.offset) + c = il.add(4, a, b) + return (c, [a, b, c]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpRegisterIndirectIndex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: int, scale: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + + def __repr__(self): + return "OpRegisterIndirectIndex(%d, %s, 0x%x, %s, %d, %d)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale) + + def format(self, addr: int) -> List[InstructionTextToken]: + # $1234(a0,a1.l*4) + tokens = [] + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # return il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + e = il.add(4, a, b) + + c = il.reg(4 if self.ireg_long else 2, self.ireg) + d = il.const(1, self.scale) + f = il.mult(4, c, d) + + g = il.add(4, e, f) + return (g, [a, b, c, d, e, f, g]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirect(Operand): + def __init__(self, size: int, reg: str, offset: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirect(%d, %s, %d, %d)" % (self.size, self.reg, self.offset, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0],$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.const(4, self.outer_displacement) + + f = il.add(4, d, e) + return (f, [a, b, c, d, e, f]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirectPostindex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirectPostindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0],a1.l*4,$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # j = il.add(4, d, i) + # d = il.load(4, c) + # c = il.add(4, a, b) + # a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # b = il.const(4, self.offset) + # ) + # ), + # i = il.add(4, g, h) + # g = il.mult(4, e, f) + # e = il.reg(4 if self.ireg_long else 2, self.ireg), + # f = il.const(1, self.scale) + # ), + # h = il.const(4, self.outer_displacement) + # ) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + d = il.load(4, c) + + e = il.reg(4 if self.ireg_long else 2, self.ireg), + f = il.const(1, self.scale) + # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) + # FIXME: why 'e' is a tuple with a second element missing??? + g = il.mult(4, e[0], f) + + h = il.const(4, self.outer_displacement) + i = il.add(4, g, h) + + j = il.add(4, d, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpMemoryIndirectPreindex(Operand): + def __init__(self, size: int, reg: str, offset: int, ireg: str, ireg_long: bool, scale: int, outer_displacement: int): + self.size = size + self.reg = reg + self.offset = offset + self.ireg = ireg + self.ireg_long = ireg_long + self.scale = scale + self.outer_displacement = outer_displacement + + def __repr__(self): + return "OpMemoryIndirectPreindex(%d, %s, 0x%x, %s, %d, %d, 0x%x)" % (self.size, self.reg, self.offset, self.ireg, self.ireg_long, self.scale, self.outer_displacement) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ([$1234,a0,a1.l*4],$1234) + tokens = [] + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "(")) + tokens.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "[")) + if self.offset != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.offset), self.offset)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.reg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, self.ireg)) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".")) + tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "l" if self.ireg_long else 'w')) + if self.scale != 1: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, "*")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "{}".format(self.scale), self.scale)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, "]")) + if self.outer_displacement != 0: + tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ",")) + tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:x}".format(self.outer_displacement), self.outer_displacement)) + tokens.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")")) + return tokens + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # return il.add(4, + # il.load(4, + # il.add(4, + # il.add(4, + # il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg), + # il.const(4, self.offset) + # ), + # il.mult(4, + # il.reg(4 if self.ireg_long else 2, self.ireg), + # il.const(1, self.scale) + # ) + # ) + # ), + # il.const(4, self.outer_displacement) + # ) + a = il.const_pointer(4, il.current_address+2) if self.reg == 'pc' else il.reg(4, self.reg) + b = il.const(4, self.offset) + c = il.add(4, a, b) + + d = il.reg(4 if self.ireg_long else 2, self.ireg) + e = il.const(1, self.scale) + f = il.mult(4, d, e) + + g = il.add(4, c, f) + h = il.load(4, g) + + i = il.const(4, self.outer_displacement) + j = il.add(4, h, i) + return (j, [a, b, c, d, e, f, g, h, i, j]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + if self.reg == 'pc': + return il.unimplemented() + else: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpAbsolute(Operand): + def __init__(self, size, address, address_size, address_width): + self.size = size + self.address = address + self.address_size = address_size + self.address_width = address_width + + def __repr__(self): + return "OpAbsolute(%d, 0x%x, %d, %d)" % (self.size, self.address, self.address_size, self.address_width) + + def format(self, addr: int) -> List[InstructionTextToken]: + # ($1234).w + return [ + InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, "("), + InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.address, 1 << self.address_size), self.address, 1 << self.address_size), + InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, ")"+SizeSuffix[self.address_size]) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + # return il.sign_extend(self.address_width, + # il.const(1 << self.address_size, self.address) + # ) + a = il.const(1 << self.address_size, self.address) + b = il.sign_extend(self.address_width, a) + return (b, [a, b]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.load(1 << self.size, self.get_address_il(il)) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + #return il.store(1 << self.size, self.get_address_il(il), value, flags) + return il.expr(LowLevelILOperation.LLIL_STORE, self.get_address_il(il), value, size=1 << self.size, flags=flags) + + +class OpImmediate(Operand): + def __init__(self, size, value): + self.size = size + self.value = value + + def __repr__(self): + return "OpImmediate(%d, 0x%x)" % (self.size, self.value) + + def format(self, addr: int) -> List[InstructionTextToken]: + # #$1234 + return [ + InstructionTextToken(InstructionTextTokenType.TextToken, "#"), + #InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) + InstructionTextToken(InstructionTextTokenType.IntegerToken, "${:0{}x}".format(self.value, 1 << self.size), self.value, 1 << self.size) + ] + + def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return None + + def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List[ExpressionIndex]]: + r = il.unimplemented() + return (r, [r]) + + def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + return il.const(1 << self.size, self.value) + + def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: + return il.unimplemented() + + +# condition mapping to LLIL flag conditions +ConditionMapping = { + 'hi': LowLevelILFlagCondition.LLFC_UGT, + 'ls': LowLevelILFlagCondition.LLFC_ULE, + 'cc': LowLevelILFlagCondition.LLFC_UGE, + 'cs': LowLevelILFlagCondition.LLFC_ULT, + 'ne': LowLevelILFlagCondition.LLFC_NE, + 'eq': LowLevelILFlagCondition.LLFC_E, + 'vc': LowLevelILFlagCondition.LLFC_NO, + 'vs': LowLevelILFlagCondition.LLFC_O, + 'pl': LowLevelILFlagCondition.LLFC_POS, + 'mi': LowLevelILFlagCondition.LLFC_NEG, + 'ge': LowLevelILFlagCondition.LLFC_SGE, + 'lt': LowLevelILFlagCondition.LLFC_SLT, + 'gt': LowLevelILFlagCondition.LLFC_SGT, + 'le': LowLevelILFlagCondition.LLFC_SLE, +} diff --git a/test.py b/test.py index deeb66c..28fb23d 100644 --- a/test.py +++ b/test.py @@ -4,6 +4,8 @@ test_cases = [ # subq.b #$1,d0b (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), + # ror.b #$1,d1 + (b'\xe2\x19', 'LLIL_SET_REG.d(d1,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d1)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_ROR.b{*}(LLIL_REG.b(d1),LLIL_CONST.b(0x1)))))'), ] import re @@ -80,7 +82,8 @@ def test_all(): print('MISMATCH AT TEST %d!' % test_i) print('\t input: %s' % data.hex()) print('\texpected: %s' % expected) - print('\t actual: %s' % actual) + print('\t actual: ') + print(actual) print('\t tree:') print(il_str_to_tree(actual)) return False From d233dac1c96ce4399bb1726f15eed80c8bd5af80 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:49:58 +1000 Subject: [PATCH 10/46] set address size to 4 --- m68k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m68k.py b/m68k.py index 7109fa8..0c424b9 100644 --- a/m68k.py +++ b/m68k.py @@ -47,7 +47,7 @@ class M68000(Architecture): name = "M68000" - address_size = 3 + address_size = 4 default_int_size = 4 max_instr_length = 22 endianness = Endianness.BigEndian From 993456b7d6fb4e5398a5f90f87203accf606df50 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 6 Mar 2022 20:14:59 +1100 Subject: [PATCH 11/46] run all tests --- test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 28fb23d..88952db 100644 --- a/test.py +++ b/test.py @@ -6,6 +6,10 @@ (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), # ror.b #$1,d1 (b'\xe2\x19', 'LLIL_SET_REG.d(d1,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d1)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_ROR.b{*}(LLIL_REG.b(d1),LLIL_CONST.b(0x1)))))'), + # move.b $0012(a6),d0 + # (b'\x10\x2e\x00\x12', ''), + # btst #$0,d0 + # (b'\x08\x00\x00\x00', ''), ] import re @@ -76,6 +80,7 @@ def il_str_to_tree(ilstr): return result def test_all(): + ret = True for (test_i, (data, expected)) in enumerate(test_cases): actual = instr_to_il(data) if actual != expected: @@ -86,5 +91,5 @@ def test_all(): print(actual) print('\t tree:') print(il_str_to_tree(actual)) - return False - return True + ret = False + return ret From b5c1fa3eec89c9ec2cc7b1165e4a79d8ce5f76e4 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 6 Mar 2022 20:17:16 +1100 Subject: [PATCH 12/46] Simplify register by not using bit masks. https://github.com/ubuntor/binaryninja-m68k/commit/bba65beda1a01cd1215c1ca51f8dbf3ca57a99e0 --- m68k.py | 33 +++++++++++++++++++++++++++++++++ m68k_ops.py | 20 ++++++++++---------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/m68k.py b/m68k.py index 0c424b9..b02df21 100644 --- a/m68k.py +++ b/m68k.py @@ -69,6 +69,39 @@ class M68000(Architecture): 'a6': RegisterInfo('a6', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), 'sp': RegisterInfo('sp', 4, extend=ImplicitRegisterExtend.SignExtendToFullWidth), + 'd0.w': RegisterInfo('d0', 2), + 'd1.w': RegisterInfo('d1', 2), + 'd2.w': RegisterInfo('d2', 2), + 'd3.w': RegisterInfo('d3', 2), + 'd4.w': RegisterInfo('d4', 2), + 'd5.w': RegisterInfo('d5', 2), + 'd6.w': RegisterInfo('d6', 2), + 'd7.w': RegisterInfo('d7', 2), + 'd0.b': RegisterInfo('d0', 1), + 'd1.b': RegisterInfo('d1', 1), + 'd2.b': RegisterInfo('d2', 1), + 'd3.b': RegisterInfo('d3', 1), + 'd4.b': RegisterInfo('d4', 1), + 'd5.b': RegisterInfo('d5', 1), + 'd6.b': RegisterInfo('d6', 1), + 'd7.b': RegisterInfo('d7', 1), + 'a0.w': RegisterInfo('a0', 2), + 'a1.w': RegisterInfo('a1', 2), + 'a2.w': RegisterInfo('a2', 2), + 'a3.w': RegisterInfo('a3', 2), + 'a4.w': RegisterInfo('a4', 2), + 'a5.w': RegisterInfo('a5', 2), + 'a6.w': RegisterInfo('a6', 2), + 'sp.w': RegisterInfo('sp', 2), + 'a0.b': RegisterInfo('a0', 1), + 'a1.b': RegisterInfo('a1', 1), + 'a2.b': RegisterInfo('a2', 1), + 'a3.b': RegisterInfo('a3', 1), + 'a4.b': RegisterInfo('a4', 1), + 'a5.b': RegisterInfo('a5', 1), + 'a6.b': RegisterInfo('a6', 1), + 'sp.b': RegisterInfo('sp', 1), + 'sr': RegisterInfo('sr', 2), 'ccr': RegisterInfo('sr', 1), diff --git a/m68k_ops.py b/m68k_ops.py index fb34e60..ae1bed5 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -157,7 +157,7 @@ SizeSuffix = [ '.b', # SIZE_BYTE '.w', # SIZE_WORD - '.l', # SIZE_LONG + '', # SIZE_LONG ] # Operands @@ -224,15 +224,14 @@ def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex return il.unimplemented() # return il.set_reg(1 << self.size, self.reg, value) - # if self.size == SIZE_BYTE: - # if self.reg[0] == 'a' or self.reg == 'sp': - # return None - # else: - # return il.set_reg(1, self.reg+'.b', value, flags) - # elif self.size == SIZE_WORD: - # return il.set_reg(2, self.reg+'.w', value, flags) - # else: - # return il.set_reg(4, self.reg, value, flags) + if self.size == SIZE_BYTE: + if self.reg[0] == 'a' or self.reg == 'sp': + return il.unimplemented() + if self.size == SIZE_LONG: + if not value: + return il.unimplemented() + return il.set_reg(1 << self.size, self.reg + SizeSuffix[self.size], value, flags) + """ if self.size == SIZE_BYTE: if self.reg[0] == 'a' or self.reg == 'sp': return il.unimplemented() @@ -248,6 +247,7 @@ def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex return il.set_reg(4, self.reg, value, flags) else: return il.unimplemented() + """ class OpRegisterDirectPair(Operand): From 1dd3e6ed3ac06db8e809ab13407289b660acce18 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 6 Mar 2022 20:54:36 +1100 Subject: [PATCH 13/46] Updated lifting of the 'bcc', 'dbcc', 'bra', and 'jmp' instructions so their targets aren't interpreted as data variables https://github.com/viclw/binaryninja-m68k/commit/96176725a9c095e8f5b672ff5ee2befa1be89c9d --- m68k.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/m68k.py b/m68k.py index b02df21..e2cf2d7 100644 --- a/m68k.py +++ b/m68k.py @@ -514,7 +514,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'cas': skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -560,7 +560,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'cas2': skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -623,7 +623,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'chk': skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -671,7 +671,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'chk2': skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -956,7 +956,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'cmp2': skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -1260,8 +1260,13 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in dstlabel = None try: - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST_PTR: + # OpRegisterIndirectDisplacement dstlabel = il.get_label_for_address(il.arch, tmpil[dest_il].constant) + elif (tmpil[dest_il].operation == LowLevelILOperation.LLIL_SX and + tmpil[dest_il].operands[0].operation == LowLevelILOperation.LLIL_CONST): + # OpAbsolute + dstlabel = il.get_label_for_address(il.arch, tmpil[dest_il].operands[0].constant) except: raise @@ -1300,7 +1305,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) else: t = None - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST_PTR: t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) indirect = False @@ -1349,8 +1354,8 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) else: branch = None - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST: - branch = il.get_label_for_address(Architecture['M68000'], tmpil[dest_il].constant) + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST_PTR: + branch = il.get_label_for_address(il.arch, tmpil[dest_il].constant) indirect = False @@ -1360,7 +1365,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -1422,7 +1427,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in else: skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() @@ -1533,7 +1538,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in else: skip_label_found = True - skip = il.get_label_for_address(Architecture['M68000'], il.current_address+length) + skip = il.get_label_for_address(il.arch, il.current_address+length) if skip is None: skip = LowLevelILLabel() From a94588bc728d3c65a42381a6c4344c72d7a65939 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 10 Mar 2022 08:04:40 +1100 Subject: [PATCH 14/46] Fix OpRegisterDirect.get_dest_il when value is 0 --- m68k_ops.py | 26 +++++++------------------- test.py | 13 +++++++------ 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/m68k_ops.py b/m68k_ops.py index ae1bed5..0a15414 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -160,6 +160,12 @@ '', # SIZE_LONG ] + +def dump(obj): + for attr in dir(obj): + print("obj.%s = %r" % (attr, getattr(obj, attr))) + + # Operands class Operand: @@ -223,31 +229,13 @@ def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex if self.reg == 'ccr': return il.unimplemented() - # return il.set_reg(1 << self.size, self.reg, value) if self.size == SIZE_BYTE: if self.reg[0] == 'a' or self.reg == 'sp': return il.unimplemented() if self.size == SIZE_LONG: - if not value: + if value is None: return il.unimplemented() return il.set_reg(1 << self.size, self.reg + SizeSuffix[self.size], value, flags) - """ - if self.size == SIZE_BYTE: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.unimplemented() - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffffff00), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xff), value)), flags) - elif self.size == SIZE_WORD: - if self.reg[0] == 'a' or self.reg == 'sp': - return il.set_reg(4, self.reg, il.sign_extend(4, value), flags) - else: - return il.set_reg(4, self.reg, il.or_expr(4, il.and_expr(4, il.const(4, 0xffff0000), il.reg(4, self.reg)), il.and_expr(4, il.const(4, 0xffff), value)), flags) - else: - if value: - return il.set_reg(4, self.reg, value, flags) - else: - return il.unimplemented() - """ class OpRegisterDirectPair(Operand): diff --git a/test.py b/test.py index 88952db..16a7a3b 100644 --- a/test.py +++ b/test.py @@ -3,13 +3,14 @@ test_cases = [ # subq.b #$1,d0b - (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), + # (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), # ror.b #$1,d1 - (b'\xe2\x19', 'LLIL_SET_REG.d(d1,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d1)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_ROR.b{*}(LLIL_REG.b(d1),LLIL_CONST.b(0x1)))))'), - # move.b $0012(a6),d0 - # (b'\x10\x2e\x00\x12', ''), - # btst #$0,d0 - # (b'\x08\x00\x00\x00', ''), + # (b'\xe2\x19', 'LLIL_SET_REG.d(d1,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d1)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_ROR.b{*}(LLIL_REG.b(d1),LLIL_CONST.b(0x1)))))'), + # Unimplemented instructions + # moveq #$0000,d0 + (b'\x70\x00', 'LLIL_SET_REG.d{nzvc}(d0,LLIL_CONST.d(0x0))'), + # subq.b #$1,d0 + (b'\x53\x00', ''), ] import re From d3a41fb035491cdd1951d113bd975186631c450d Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 15:56:21 +1000 Subject: [PATCH 15/46] try to correctly lift some flags --- m68k.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- test.py | 7 ++----- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/m68k.py b/m68k.py index e2cf2d7..1223d84 100644 --- a/m68k.py +++ b/m68k.py @@ -1698,7 +1698,51 @@ def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.Lo il.append(il.unimplemented()) return length - def is_never_branch_patch_available(self, data: bytes, addr: int = 0) -> bool: + def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_type, flag, operands, il: LowLevelILFunction) -> ExpressionIndex: + # special + if flag == 'x': + if (op == LowLevelILOperation.LLIL_SUB) or (op == LowLevelILOperation.LLIL_ADD) or (op == LowLevelILOperation.LLIL_NEG): + # subq, add, neg: x is carry + return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) + # if (op == LowLevelILOperation.LLIL_ASR) or (op == LowLevelILOperation.LLIL_LSR): + # # asr, lsr: if shift is 0, x is unaffected, otherwise x is carry + # if operands[1] != 0: + # return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) + # return il.flag('x') + + # carry + if flag == 'c': + if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): + # move, moveq: c is cleared + return il.const(1, 0) + + # overflow + if flag == 'v': + if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): + # move, moveq: v is cleared + return il.const(1, 0) + + + if not self._flags: + self._flags = {} + request = {'op': str(LowLevelILOperation(op)), 'size': size, 'write_type': write_type, 'flag': flag} + srequest = str(request) + if not srequest in self._flags: + self._flags[srequest] = 0 + print(srequest, operands) + self._flags[srequest] += 1 + + # if flag == 'c': + # if (op == LowLevelILOperation.LLIL_SUB) or (op == LowLevelILOperation.LLIL_SBB): + # # Subtraction carry flag is inverted from the commom implementation + # return il.not_expr(0, self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il)) + # # Other operations use a normal carry flag + # return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) + # return Architecture.get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il) + + return Architecture.get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il) + + def is_never_branch_patch_available(self, data, addr: int = 0) -> bool: data = bytearray(data) if data[0] & 0xf0 == 0x60: # BRA, BSR, Bcc diff --git a/test.py b/test.py index 16a7a3b..17ede5d 100644 --- a/test.py +++ b/test.py @@ -2,15 +2,12 @@ from .m68k import * test_cases = [ - # subq.b #$1,d0b - # (b'\x53\x00', 'LLIL_SET_REG.d(d0,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d0)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))))'), - # ror.b #$1,d1 - # (b'\xe2\x19', 'LLIL_SET_REG.d(d1,LLIL_OR.d(LLIL_AND.d(LLIL_CONST.d(0xFFFFFF00),LLIL_REG.d(d1)),LLIL_AND.d(LLIL_CONST.d(0xFF),LLIL_ROR.b{*}(LLIL_REG.b(d1),LLIL_CONST.b(0x1)))))'), # Unimplemented instructions # moveq #$0000,d0 (b'\x70\x00', 'LLIL_SET_REG.d{nzvc}(d0,LLIL_CONST.d(0x0))'), # subq.b #$1,d0 - (b'\x53\x00', ''), + # FIXME: Generate flag 'x' + (b'\x53\x00', 'LLIL_SET_REG.b(d0.b,LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))'), ] import re From 354fefd779191cd55bc8fc064443ed0fe881f063 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Fri, 11 Mar 2022 11:33:01 +1100 Subject: [PATCH 16/46] ANDI + ORI: set c and v flags --- m68k.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/m68k.py b/m68k.py index 1223d84..c68a34e 100644 --- a/m68k.py +++ b/m68k.py @@ -1706,7 +1706,9 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) # if (op == LowLevelILOperation.LLIL_ASR) or (op == LowLevelILOperation.LLIL_LSR): # # asr, lsr: if shift is 0, x is unaffected, otherwise x is carry + # # FIXME: shift size isn't always a constant # if operands[1] != 0: + # FIXME: carry needs to be lifted as well # return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) # return il.flag('x') @@ -1715,12 +1717,18 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): # move, moveq: c is cleared return il.const(1, 0) + if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR): + # andi, ori: c is cleared + return il.const(1, 0) # overflow if flag == 'v': if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): # move, moveq: v is cleared return il.const(1, 0) + if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR): + # andi, ori: v is cleared + return il.const(1, 0) if not self._flags: From 37b9156bf5cf202104f0bd3367a1a2bc8c039471 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Fri, 11 Mar 2022 11:44:48 +1100 Subject: [PATCH 17/46] EORI: set c and v flags --- m68k.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/m68k.py b/m68k.py index c68a34e..5527c01 100644 --- a/m68k.py +++ b/m68k.py @@ -1717,8 +1717,8 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): # move, moveq: c is cleared return il.const(1, 0) - if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR): - # andi, ori: c is cleared + if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR) or (op == LowLevelILOperation.LLIL_XOR): + # andi, ori, eori: c is cleared return il.const(1, 0) # overflow @@ -1726,8 +1726,8 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ if (op == LowLevelILOperation.LLIL_STORE) or (op == LowLevelILOperation.LLIL_SET_REG): # move, moveq: v is cleared return il.const(1, 0) - if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR): - # andi, ori: v is cleared + if (op == LowLevelILOperation.LLIL_AND) or (op == LowLevelILOperation.LLIL_OR) or (op == LowLevelILOperation.LLIL_XOR): + # andi, ori, eori: v is cleared return il.const(1, 0) From db26105ee0f5c7ede487b0ad514306cf31f3a2dd Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 16:02:01 +1000 Subject: [PATCH 18/46] handle NEG/NEGX flags differently. --- m68k.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/m68k.py b/m68k.py index 5527c01..2df40df 100644 --- a/m68k.py +++ b/m68k.py @@ -1701,8 +1701,8 @@ def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.Lo def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_type, flag, operands, il: LowLevelILFunction) -> ExpressionIndex: # special if flag == 'x': - if (op == LowLevelILOperation.LLIL_SUB) or (op == LowLevelILOperation.LLIL_ADD) or (op == LowLevelILOperation.LLIL_NEG): - # subq, add, neg: x is carry + if (op == LowLevelILOperation.LLIL_SUB) or (op == LowLevelILOperation.LLIL_ADD): + # subq, add: x is carry return self.get_default_flag_write_low_level_il(op, size, FlagRole.CarryFlagRole, operands, il) # if (op == LowLevelILOperation.LLIL_ASR) or (op == LowLevelILOperation.LLIL_LSR): # # asr, lsr: if shift is 0, x is unaffected, otherwise x is carry @@ -1733,7 +1733,7 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ if not self._flags: self._flags = {} - request = {'op': str(LowLevelILOperation(op)), 'size': size, 'write_type': write_type, 'flag': flag} + request = {'op': str(LowLevelILOperation(op)), 'write_type': write_type, 'flag': flag} srequest = str(request) if not srequest in self._flags: self._flags[srequest] = 0 From 3f94092883c3803c519dc0db552ad77a4287b321 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 16:03:41 +1000 Subject: [PATCH 19/46] Add default calling convention where everything is passed via registers. --- __init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/__init__.py b/__init__.py index 1e3ee31..e4c5d2b 100644 --- a/__init__.py +++ b/__init__.py @@ -16,6 +16,7 @@ from .m68k import * from .test import test_all +from binaryninja import Architecture, CallingConvention #PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) @@ -31,6 +32,16 @@ M68330.register() M68340.register() +# BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) +BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) + +class ParametersInRegistersCallingConvention(CallingConvention): + name = "ParametersInRegisters" + + +arch = Architecture['M68000'] +arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, 'default')) + BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) test_all() From 726f3c4ba8fa77c3c644d66672adfdf163c62ade Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 1 May 2022 11:15:51 +1000 Subject: [PATCH 20/46] Add tests for calling functions. --- test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test.py b/test.py index 17ede5d..b022029 100644 --- a/test.py +++ b/test.py @@ -8,6 +8,12 @@ # subq.b #$1,d0 # FIXME: Generate flag 'x' (b'\x53\x00', 'LLIL_SET_REG.b(d0.b,LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))'), + + # jsr 0x5dc1c, no arguments for this call + (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_SX.d(LLIL_CONST.d(0x5DC1C)))'), + + # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call + (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))'), ] import re @@ -25,10 +31,11 @@ def il2str(il): # print size-specified IL constants in hex if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: + const_ptr_suffix = '_PTR' if il.operation in [LowLevelILOperation.LLIL_CONST_PTR] else '' tmp = il.operands[0] if tmp < 0: tmp = (1<<(il.size*8))+tmp tmp = '0x%X' % tmp if il.size else '%d' % il.size - return 'LLIL_CONST%s(%s)' % (size_code, tmp) + return 'LLIL_CONST%s%s(%s)' % (const_ptr_suffix, size_code, tmp) else: return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) elif isinstance(il, list): From 7c2c65a4a4ba38f3519b60d3770ce16be1d9548b Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 1 May 2022 11:48:59 +1000 Subject: [PATCH 21/46] Fix arguments not being shown in binja when calling OpAbsolute functions. --- m68k_ops.py | 11 ++++++++--- test.py | 5 ++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/m68k_ops.py b/m68k_ops.py index 0a15414..500a166 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -860,9 +860,14 @@ def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List # return il.sign_extend(self.address_width, # il.const(1 << self.address_size, self.address) # ) - a = il.const(1 << self.address_size, self.address) - b = il.sign_extend(self.address_width, a) - return (b, [a, b]) + a = il.const_pointer(1 << self.address_size, self.address) + return (a, [a]) + # FIXME: binja 3.0.3355-dev won't show function arguments if we + # use il.sign_extend. + # if (1 << self.address_size) == self.address_width: + # return (a, [a]) + # b = il.sign_extend(self.address_width, a) + # return (b, [a, b]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.load(1 << self.size, self.get_address_il(il)) diff --git a/test.py b/test.py index b022029..7efc874 100644 --- a/test.py +++ b/test.py @@ -10,7 +10,7 @@ (b'\x53\x00', 'LLIL_SET_REG.b(d0.b,LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))'), # jsr 0x5dc1c, no arguments for this call - (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_SX.d(LLIL_CONST.d(0x5DC1C)))'), + (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_CONST_PTR.d(0x5DC1C))'), # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))'), @@ -31,11 +31,10 @@ def il2str(il): # print size-specified IL constants in hex if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: - const_ptr_suffix = '_PTR' if il.operation in [LowLevelILOperation.LLIL_CONST_PTR] else '' tmp = il.operands[0] if tmp < 0: tmp = (1<<(il.size*8))+tmp tmp = '0x%X' % tmp if il.size else '%d' % il.size - return 'LLIL_CONST%s%s(%s)' % (const_ptr_suffix, size_code, tmp) + return '%s%s(%s)' % (il.operation.name, size_code, tmp) else: return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) elif isinstance(il, list): From e83c774bc307388f6c0c703cedf9de0d0309892c Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sat, 28 May 2022 16:19:41 +1000 Subject: [PATCH 22/46] slightly improve TST flags --- m68k.py | 10 +++++++--- test.py | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/m68k.py b/m68k.py index 2df40df..12ec288 100644 --- a/m68k.py +++ b/m68k.py @@ -133,10 +133,11 @@ class M68000(Architecture): } stack_pointer = 'sp' flags = ['x', 'n', 'z', 'v', 'c'] - flag_write_types = ['*', 'nzvc'] + flag_write_types = ['*', 'nzvc', 'nz'] flags_written_by_flag_write_type = { '*': ['x', 'n', 'z', 'v', 'c'], 'nzvc': ['n', 'z', 'v', 'c'], + 'nz': ['n', 'z'], } flag_roles = { 'x': FlagRole.SpecialFlagRole, @@ -1026,10 +1027,13 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append( il.sub(size_bytes, dest.get_source_il(il), - il.const(4, 0), - flags='nzvc' + il.const(size_bytes, 0), + flags='nz' ) ) + # vc: always cleared + il.append(il.set_flag('v', il.const(1, 0x0))) + il.append(il.set_flag('c', il.const(1, 0x0))) elif instr in ('and', 'andi'): if instr == 'andi' and isinstance(dest, OpRegisterDirect) and dest.reg in ('ccr', 'sr'): if not source.value & 0x01: il.append(il.set_flag('c', il.const(1, 0))) diff --git a/test.py b/test.py index 7efc874..d851f84 100644 --- a/test.py +++ b/test.py @@ -14,6 +14,12 @@ # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))'), + + # lea (data_7a9ee[2]),a1 + (b'\x43\xf9\x00\x07\xa9\xf0', 'LLIL_SET_REG.d(a1,LLIL_CONST_PTR.d(0x7A9F0))'), + + # tst.w d1 + (b'\x4a\x41', 'LLIL_SUB.w{nz}(LLIL_REG.w(d1),LLIL_CONST.w(0x0)); LLIL_SET_FLAG(v,LLIL_CONST.b(0x0)); LLIL_SET_FLAG(c,LLIL_CONST.b(0x0))'), ] import re From 18d239c7e988fa7f29b50809387e6cfc5fd02c9d Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 29 May 2022 08:45:00 +1000 Subject: [PATCH 23/46] fix MOVE for negative displacement --- m68k_ops.py | 4 ++-- test.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/m68k_ops.py b/m68k_ops.py index 500a166..3fd15d9 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -516,8 +516,8 @@ def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List return (r, [r]) else: a = il.reg(4, self.reg) - b = il.const(2, self.offset) - c = il.add(4, a, b) + b = il.const(2, self.offset) if self.offset >= 0 else il.const(2, -self.offset) + c = il.add(4, a, b) if self.offset >= 0 else il.sub(4, a, b) return (c, [a, b, c]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: diff --git a/test.py b/test.py index d851f84..9b97b9e 100644 --- a/test.py +++ b/test.py @@ -20,6 +20,10 @@ # tst.w d1 (b'\x4a\x41', 'LLIL_SUB.w{nz}(LLIL_REG.w(d1),LLIL_CONST.w(0x0)); LLIL_SET_FLAG(v,LLIL_CONST.b(0x0)); LLIL_SET_FLAG(c,LLIL_CONST.b(0x0))'), + + # lea ($279d2e),a0 + # move $-004(a0),$0074(a6) + (b'\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74', 'LLIL_SET_REG.d(a0,LLIL_CONST_PTR.d(0x279D2E)); LLIL_STORE.d{nzvc}(LLIL_ADD.d(LLIL_REG.d(a6),LLIL_CONST.w(0x74)),LLIL_LOAD.d(LLIL_SUB.d(LLIL_REG.d(a0),LLIL_CONST.w(0x4))))'), ] import re From 9c6d0ffec8d10c2c9794b35b7e55def514468237 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 16:09:01 +1000 Subject: [PATCH 24/46] Add FIXMEs for potentially incorrect LLIL --- m68k.py | 8 +++++++- m68k_ops.py | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/m68k.py b/m68k.py index 12ec288..69668a8 100644 --- a/m68k.py +++ b/m68k.py @@ -376,12 +376,14 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) elif instr in ('muls', 'mulu'): if isinstance(dest, OpRegisterDirectPair): + print(instr, 'FIXME') il.append( il.set_reg_split(4, dest.reg1, dest.reg2, il.mult(4, source.get_source_il(il), + # FIXME dest.get_source_il(il)[0], flags='nzvc' ) @@ -567,8 +569,10 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in skip = LowLevelILLabel() skip_label_found = False + print(instr, 'FIXME') il.append( il.sub(size_bytes, + # FIXME third.get_source_il(il)[0], source.get_source_il(il)[0], flags='nzvc' @@ -585,8 +589,10 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.mark_label(check2) + print(instr, 'FIXME') il.append( il.sub(size_bytes, + # FIXME third.get_source_il(il)[1], source.get_source_il(il)[1], flags='nzvc' @@ -1741,7 +1747,7 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ srequest = str(request) if not srequest in self._flags: self._flags[srequest] = 0 - print(srequest, operands) + # print(srequest, operands) self._flags[srequest] += 1 # if flag == 'c': diff --git a/m68k_ops.py b/m68k_ops.py index 3fd15d9..5f37da7 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -221,6 +221,8 @@ def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: z = il.flag_bit(1, 'z', 2) n = il.flag_bit(1, 'n', 3) x = il.flag_bit(1, 'x', 4) + print(self) + # FIXME: return array return il.or_expr(1, il.or_expr(1, il.or_expr(1, il.or_expr(1, c, v), z), n), x) else: return il.reg(1 << self.size, self.reg) @@ -269,6 +271,7 @@ def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return (il.reg(1 << self.size, self.reg1), il.reg(1 << self.size, self.reg2)) def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return (il.set_reg(1 << self.size, self.reg1, values[0], flags), il.set_reg(1 << self.size, self.reg2, values[1], flags)) @@ -316,9 +319,11 @@ def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List return (r, [r]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return [il.reg(1 << self.size, reg) for reg in self.regs] def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return [il.set_reg(1 << self.size, reg, val, flags) for reg, val in zip(self.regs, values)] @@ -390,9 +395,11 @@ def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List return ((a, b), [a, b]) def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return (il.load(1 << self.size, il.reg(4, self.reg1)), il.load(1 << self.size, il.reg(4, self.reg2))) def get_dest_il(self, il: LowLevelILFunction, values, flags=0) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? #return (il.store(1 << self.size, il.reg(4, self.reg1), values[0], flags), il.store(1 << self.size, il.reg(4, self.reg2), values[1], flags)) return (il.store(1 << self.size, il.reg(4, self.reg1), values[0]), il.store(1 << self.size, il.reg(4, self.reg2), values[1])) @@ -418,6 +425,7 @@ def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: return None def get_post_il(self, il: LowLevelILFunction) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return il.set_reg(4, self.reg, il.add(4, @@ -456,6 +464,7 @@ def format(self, addr: int) -> List[InstructionTextToken]: ] def get_pre_il(self, il: LowLevelILFunction) -> ExpressionIndex: + # FIXME: are we correctly putting them into lists? return il.set_reg(4, self.reg, il.sub(4, From f713fb7d6f8e26916ebe8077f00c62e97b35e03b Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Sun, 26 Jun 2022 16:10:31 +1000 Subject: [PATCH 25/46] remove obsolete comment. --- test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test.py b/test.py index 9b97b9e..acb17d7 100644 --- a/test.py +++ b/test.py @@ -2,7 +2,6 @@ from .m68k import * test_cases = [ - # Unimplemented instructions # moveq #$0000,d0 (b'\x70\x00', 'LLIL_SET_REG.d{nzvc}(d0,LLIL_CONST.d(0x0))'), # subq.b #$1,d0 From aae028781b075a8d6fc71e5c6247460472f85588 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Tue, 14 Jun 2022 16:03:05 +1000 Subject: [PATCH 26/46] Add testcases for get_address_il2 handling. --- test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test.py b/test.py index acb17d7..df81fac 100644 --- a/test.py +++ b/test.py @@ -23,6 +23,18 @@ # lea ($279d2e),a0 # move $-004(a0),$0074(a6) (b'\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74', 'LLIL_SET_REG.d(a0,LLIL_CONST_PTR.d(0x279D2E)); LLIL_STORE.d{nzvc}(LLIL_ADD.d(LLIL_REG.d(a6),LLIL_CONST.w(0x74)),LLIL_LOAD.d(LLIL_SUB.d(LLIL_REG.d(a0),LLIL_CONST.w(0x4))))'), + + # beq (data_10) + (b'\x67\x00\x00\x0e', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_E,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x10))'), + + # jmp ($4c862) + (b'\x4e\xf9\x00\x04\xc8\x62', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x4C862))'), + + # bra (data_28) + (b'\x60\x00\x00\x26', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x28))'), + + # dbf d7,(data_-2c) + (b'\x51\xcf\xff\xd4', 'LLIL_IF(LLIL_CONST.b(0x0),6,1); LLIL_RET(LLIL_POP.d()); LLIL_SET_REG.w(temp0,LLIL_SUB.w(LLIL_REG.w(d7),LLIL_CONST.w(0x1))); LLIL_SET_REG.w(d7.w,LLIL_REG.w(temp0)); LLIL_IF(LLIL_CMP_E.w(LLIL_REG.w(temp0),LLIL_CONST.w(0xFFFF)),6,4); LLIL_JUMP(LLIL_CONST_PTR.d(0xFFFFFFD6))'), ] import re From 03e6a0bdc39c1f50b08422741aa1e854efc7288f Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Mon, 10 Oct 2022 08:15:07 +1100 Subject: [PATCH 27/46] Replaced print with log_debug. --- __init__.py | 11 +---------- m68k.py | 8 ++++---- m68k_ops.py | 2 -- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/__init__.py b/__init__.py index e4c5d2b..906dcf2 100644 --- a/__init__.py +++ b/__init__.py @@ -2,16 +2,7 @@ import os import binaryninja -__module__ = sys.modules[__name__] -__logger = binaryninja.Logger(0, __module__.__name__) - -log = __logger.log -log_debug = __logger.log_debug -log_info = __logger.log_info -log_warn = __logger.log_warn -log_error = __logger.log_error -log_alert = __logger.log_alert - +from .logging import log_debug, __module__ log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') from .m68k import * diff --git a/m68k.py b/m68k.py index 69668a8..e0a81b7 100644 --- a/m68k.py +++ b/m68k.py @@ -42,6 +42,7 @@ ImplicitRegisterExtend, SymbolType) from binaryninja import BinaryViewType, lowlevelil +from .logging import log_debug from .m68k_ops import * from .m68k_disasm import * @@ -376,7 +377,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) elif instr in ('muls', 'mulu'): if isinstance(dest, OpRegisterDirectPair): - print(instr, 'FIXME') + log_debug(instr, 'FIXME') il.append( il.set_reg_split(4, dest.reg1, @@ -569,7 +570,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in skip = LowLevelILLabel() skip_label_found = False - print(instr, 'FIXME') + log_debug(instr, 'FIXME') il.append( il.sub(size_bytes, # FIXME @@ -589,7 +590,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.mark_label(check2) - print(instr, 'FIXME') + log_debug(instr, 'FIXME') il.append( il.sub(size_bytes, # FIXME @@ -1747,7 +1748,6 @@ def get_flag_write_low_level_il(self, op: LowLevelILOperation, size: int, write_ srequest = str(request) if not srequest in self._flags: self._flags[srequest] = 0 - # print(srequest, operands) self._flags[srequest] += 1 # if flag == 'c': diff --git a/m68k_ops.py b/m68k_ops.py index 5f37da7..61f48a9 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -736,8 +736,6 @@ def get_address_il2(self, il: LowLevelILFunction) -> Tuple[ExpressionIndex, List e = il.reg(4 if self.ireg_long else 2, self.ireg), f = il.const(1, self.scale) - # print('here1: ', e, ' ', self.ireg_long, ' ', self.ireg) - # FIXME: why 'e' is a tuple with a second element missing??? g = il.mult(4, e[0], f) h = il.const(4, self.outer_displacement) From bf2e0cdb5ae99b6f8af112f36b11709d4c32671c Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Fri, 23 Dec 2022 17:57:50 +1100 Subject: [PATCH 28/46] Hack for programs that rely on `c` flag to not being modified after `rts` --- m68k.py | 133 +++++++++++++++++++++++++++++++++++----------------- m68k_ops.py | 21 +-------- test.py | 46 ++++++++++++++++-- 3 files changed, 131 insertions(+), 69 deletions(-) diff --git a/m68k.py b/m68k.py index e0a81b7..d3de481 100644 --- a/m68k.py +++ b/m68k.py @@ -46,6 +46,42 @@ from .m68k_ops import * from .m68k_disasm import * +ConditionMapping = { + 'hi': LowLevelILFlagCondition.LLFC_UGT, # unsigned greater than + 'ls': LowLevelILFlagCondition.LLFC_ULE, # unsigned less than or equal + 'cc': LowLevelILFlagCondition.LLFC_UGE, # unsigned greater than or equal + 'cs': LowLevelILFlagCondition.LLFC_ULT, # unsigned less than + 'ne': LowLevelILFlagCondition.LLFC_NE, # not equal + 'eq': LowLevelILFlagCondition.LLFC_E, # equal + 'vc': LowLevelILFlagCondition.LLFC_NO, # no overflow + 'vs': LowLevelILFlagCondition.LLFC_O, # overflow + 'pl': LowLevelILFlagCondition.LLFC_POS, # positive + 'mi': LowLevelILFlagCondition.LLFC_NEG, # negative + 'ge': LowLevelILFlagCondition.LLFC_SGE, # signed greater than or equal + 'lt': LowLevelILFlagCondition.LLFC_SLT, # signed less than + 'gt': LowLevelILFlagCondition.LLFC_SGT, # signed greater than or equal + 'le': LowLevelILFlagCondition.LLFC_SLE, # signed less than or equal +} +FlagsRequiredForFlagCondition = { + LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], # hi + LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], # ls + LowLevelILFlagCondition.LLFC_UGE: ['c'], # cs + LowLevelILFlagCondition.LLFC_ULT: ['c'], # cs + LowLevelILFlagCondition.LLFC_NE: ['z'], # ne + LowLevelILFlagCondition.LLFC_E: ['z'], # eq + LowLevelILFlagCondition.LLFC_NO: ['v'], # vc + LowLevelILFlagCondition.LLFC_O: ['v'], # vs + LowLevelILFlagCondition.LLFC_POS: ['n'], # pl + LowLevelILFlagCondition.LLFC_NEG: ['n'], # mi + LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], # ge + LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], # lt + LowLevelILFlagCondition.LLFC_SGT: ['n', 'v', 'z'], # gt + LowLevelILFlagCondition.LLFC_SLE: ['n', 'v', 'z'], # le +} + +# hack for programs that rely on flags not being modified after `rts`. +RTS_PASS_FLAGS = False + class M68000(Architecture): name = "M68000" address_size = 4 @@ -106,6 +142,12 @@ class M68000(Architecture): 'sr': RegisterInfo('sr', 2), 'ccr': RegisterInfo('sr', 1), + # fake registers to return flags from subroutines + 'rn': RegisterInfo('rn', 1), + 'rz': RegisterInfo('rz', 1), + 'rv': RegisterInfo('rv', 1), + 'rc': RegisterInfo('rc', 1), + # control registers # MC68010/MC68020/MC68030/MC68040/CPU32 'sfc': RegisterInfo('sfc', 4), @@ -147,22 +189,24 @@ class M68000(Architecture): 'v': FlagRole.OverflowFlagRole, 'c': FlagRole.CarryFlagRole, } - flags_required_for_flag_condition = { - LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], # hi - LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], # ls - LowLevelILFlagCondition.LLFC_UGE: ['c'], # cs - LowLevelILFlagCondition.LLFC_ULT: ['c'], # cs - LowLevelILFlagCondition.LLFC_NE: ['z'], # ne - LowLevelILFlagCondition.LLFC_E: ['z'], # eq - LowLevelILFlagCondition.LLFC_NO: ['v'], # vc - LowLevelILFlagCondition.LLFC_O: ['v'], # vs - LowLevelILFlagCondition.LLFC_POS: ['n'], # pl - LowLevelILFlagCondition.LLFC_NEG: ['n'], # mi - LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], # ge - LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], # lt - LowLevelILFlagCondition.LLFC_SGT: ['n', 'v', 'z'], # gt - LowLevelILFlagCondition.LLFC_SLE: ['n', 'v', 'z'], # le + # condition mapping to LLIL flag conditions + ConditionMapping = { + 'hi': LowLevelILFlagCondition.LLFC_UGT, + 'ls': LowLevelILFlagCondition.LLFC_ULE, + 'cc': LowLevelILFlagCondition.LLFC_UGE, + 'cs': LowLevelILFlagCondition.LLFC_ULT, + 'ne': LowLevelILFlagCondition.LLFC_NE, + 'eq': LowLevelILFlagCondition.LLFC_E, + 'vc': LowLevelILFlagCondition.LLFC_NO, + 'vs': LowLevelILFlagCondition.LLFC_O, + 'pl': LowLevelILFlagCondition.LLFC_POS, + 'mi': LowLevelILFlagCondition.LLFC_NEG, + 'ge': LowLevelILFlagCondition.LLFC_SGE, + 'lt': LowLevelILFlagCondition.LLFC_SLT, + 'gt': LowLevelILFlagCondition.LLFC_SGT, + 'le': LowLevelILFlagCondition.LLFC_SLE, } + flags_required_for_flag_condition = FlagsRequiredForFlagCondition control_registers = { } memory_indirect = False @@ -1290,9 +1334,12 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.jump(dest.get_address_il(il)) ) elif instr in ('jsr', 'bsr'): + # TODO: need to save 'sr' to stack? il.append( il.call(dest.get_address_il(il)) ) + if RTS_PASS_FLAGS: + il.append(il.set_flag('c', il.reg(1, 'rc'))) elif instr == 'callm': # TODO il.append(il.unimplemented()) @@ -1301,48 +1348,43 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append(il.unimplemented()) elif instr in ('bhi', 'bls', 'bcc', 'bcs', 'bne', 'beq', 'bvc', 'bvs', 'bpl', 'bmi', 'bge', 'blt', 'bgt', 'ble'): - flag_cond = ConditionMapping.get(instr[1:], None) + flag_cond = ConditionMapping[instr[1:]] + tmpil = LowLevelILFunction(il.arch) _dest_il = dest.get_address_il2(tmpil) dest_il = _dest_il[0] for i in _dest_il[1]: tmpil.append(i) - cond_il = None - if flag_cond is not None: - cond_il = il.flag_condition(flag_cond) + t = None + if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST_PTR: + t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) - if cond_il is None: - il.append(il.unimplemented()) - else: - t = None - if tmpil[dest_il].operation == LowLevelILOperation.LLIL_CONST_PTR: - t = il.get_label_for_address(il.arch, tmpil[dest_il].constant) + indirect = False - indirect = False - - if t is None: - t = LowLevelILLabel() - indirect = True + if t is None: + t = LowLevelILLabel() + indirect = True - f_label_found = True + f_label_found = True - f = il.get_label_for_address(il.arch, il.current_address+length) + f = il.get_label_for_address(il.arch, il.current_address+length) - if f is None: - f = LowLevelILLabel() - f_label_found = False + if f is None: + f = LowLevelILLabel() + f_label_found = False - il.append( - il.if_expr(cond_il, t, f) - ) + cond_il = il.flag_condition(flag_cond) + il.append( + il.if_expr(cond_il, t, f) + ) - if indirect: - il.mark_label(t) - il.append(il.jump(dest.get_address_il(il))) + if indirect: + il.mark_label(t) + il.append(il.jump(dest.get_address_il(il))) - if not f_label_found: - il.mark_label(f) + if not f_label_found: + il.mark_label(f) elif instr in ('dbt', 'dbf', 'dbhi', 'dbls', 'dbcc', 'dbcs', 'dbne', 'dbeq', 'dbvc', 'dbvs', 'dbpl', 'dbmi', 'dbge', 'dblt', 'dbgt', 'dble'): @@ -1514,7 +1556,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in elif instr == 'rtr': il.append( il.set_reg(2, - "ccr", + 'ccr', il.pop(2) ) ) @@ -1524,6 +1566,9 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) ) elif instr == 'rts': + if RTS_PASS_FLAGS: + il.append(il.set_reg(1, 'rc', il.flag('c'))) + il.append( il.ret( il.pop(4) diff --git a/m68k_ops.py b/m68k_ops.py index 61f48a9..cb84a29 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -228,7 +228,7 @@ def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: return il.reg(1 << self.size, self.reg) def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: - if self.reg == 'ccr': + if self.reg in ['ccr', 'sr']: return il.unimplemented() if self.size == SIZE_BYTE: @@ -915,22 +915,3 @@ def get_source_il(self, il: LowLevelILFunction) -> ExpressionIndex: def get_dest_il(self, il: LowLevelILFunction, value, flags=0) -> ExpressionIndex: return il.unimplemented() - - -# condition mapping to LLIL flag conditions -ConditionMapping = { - 'hi': LowLevelILFlagCondition.LLFC_UGT, - 'ls': LowLevelILFlagCondition.LLFC_ULE, - 'cc': LowLevelILFlagCondition.LLFC_UGE, - 'cs': LowLevelILFlagCondition.LLFC_ULT, - 'ne': LowLevelILFlagCondition.LLFC_NE, - 'eq': LowLevelILFlagCondition.LLFC_E, - 'vc': LowLevelILFlagCondition.LLFC_NO, - 'vs': LowLevelILFlagCondition.LLFC_O, - 'pl': LowLevelILFlagCondition.LLFC_POS, - 'mi': LowLevelILFlagCondition.LLFC_NEG, - 'ge': LowLevelILFlagCondition.LLFC_SGE, - 'lt': LowLevelILFlagCondition.LLFC_SLT, - 'gt': LowLevelILFlagCondition.LLFC_SGT, - 'le': LowLevelILFlagCondition.LLFC_SLE, -} diff --git a/test.py b/test.py index df81fac..d51c6e6 100644 --- a/test.py +++ b/test.py @@ -1,18 +1,28 @@ # inspired by https://github.com/Vector35/arch-arm64/blob/staging/arm64test.py from .m68k import * +SAVE_FLAGS = '' +RESTORE_FLAGS = '' +FINALIZER = 'LLIL_RET(LLIL_POP.d())' + +if RTS_PASS_FLAGS: + SAVE_FLAGS = 'LLIL_SET_REG.b(rc,LLIL_FLAG(c))' + RESTORE_FLAGS = '; LLIL_SET_FLAG(c,LLIL_REG.b(rc))' + FINALIZER = SAVE_FLAGS + '; ' + FINALIZER + test_cases = [ # moveq #$0000,d0 (b'\x70\x00', 'LLIL_SET_REG.d{nzvc}(d0,LLIL_CONST.d(0x0))'), + # subq.b #$1,d0 # FIXME: Generate flag 'x' (b'\x53\x00', 'LLIL_SET_REG.b(d0.b,LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))'), # jsr 0x5dc1c, no arguments for this call - (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_CONST_PTR.d(0x5DC1C))'), + (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_CONST_PTR.d(0x5DC1C))' + RESTORE_FLAGS), # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call - (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))'), + (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))' + RESTORE_FLAGS), # lea (data_7a9ee[2]),a1 (b'\x43\xf9\x00\x07\xa9\xf0', 'LLIL_SET_REG.d(a1,LLIL_CONST_PTR.d(0x7A9F0))'), @@ -34,9 +44,22 @@ (b'\x60\x00\x00\x26', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x28))'), # dbf d7,(data_-2c) - (b'\x51\xcf\xff\xd4', 'LLIL_IF(LLIL_CONST.b(0x0),6,1); LLIL_RET(LLIL_POP.d()); LLIL_SET_REG.w(temp0,LLIL_SUB.w(LLIL_REG.w(d7),LLIL_CONST.w(0x1))); LLIL_SET_REG.w(d7.w,LLIL_REG.w(temp0)); LLIL_IF(LLIL_CMP_E.w(LLIL_REG.w(temp0),LLIL_CONST.w(0xFFFF)),6,4); LLIL_JUMP(LLIL_CONST_PTR.d(0xFFFFFFD6))'), + (b'\x51\xcf\xff\xd4','; '.join(['LLIL_IF(LLIL_CONST.b(0x0),6,1); ' + FINALIZER + '; LLIL_SET_REG.w(temp0,LLIL_SUB.w(LLIL_REG.w(d7),LLIL_CONST.w(0x1))); LLIL_SET_REG.w(d7.w,LLIL_REG.w(temp0)); LLIL_IF(LLIL_CMP_E.w(LLIL_REG.w(temp0),LLIL_CONST.w(0xFFFF)),6,4); LLIL_JUMP(LLIL_CONST_PTR.d(0xFFFFFFD6))'])), + + # bcc (data_5a) + (b'\x64\x00\x00\x58', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_UGE,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x5A))'), + + # rts + (b'\x4e\x75', ''), ] +if RTS_PASS_FLAGS: + # rtr + test_cases.append((b'\x4e\x77', 'LLIL_SET_REG.w(ccr,LLIL_POP.w()); LLIL_RET(LLIL_POP.d())')) +else: + # rtr + test_cases.append((b'\x4e\x77', 'LLIL_SET_REG.w(ccr,LLIL_POP.w())')) + import re import sys import binaryninja @@ -64,7 +87,7 @@ def il2str(il): return str(il) def instr_to_il(data): - RETURN = b'\x4e\x75' + RETURN = b'\x4e\x75' # rts platform = binaryninja.Platform['M68000'] # make a pretend function that returns @@ -78,7 +101,8 @@ def instr_to_il(data): for il in block: result.append(il2str(il)) result = '; '.join(result) - ret = 'LLIL_RET(LLIL_POP.d())' + + ret = FINALIZER if result.endswith(ret): result = result[0:result.index(ret)] if result.endswith('; '): @@ -100,10 +124,20 @@ def il_str_to_tree(ilstr): result += '\n' result += ' '*depth pass + elif c == ';': + result += '\n' + depth = 0 + result += ' '*depth + elif c == ' ': + pass else: result += c return result +# print(il_str_to_tree('foo(bar)')) +# print(il_str_to_tree('a(b(c,d(z),e));d(e(f))')) +# print(il_str_to_tree('LLIL_RET(LLIL_POP.d())' + ';LLIL_RET(LLIL_POP.d())')) + def test_all(): ret = True for (test_i, (data, expected)) in enumerate(test_cases): @@ -112,9 +146,11 @@ def test_all(): print('MISMATCH AT TEST %d!' % test_i) print('\t input: %s' % data.hex()) print('\texpected: %s' % expected) + # print(il_str_to_tree(expected)) print('\t actual: ') print(actual) print('\t tree:') print(il_str_to_tree(actual)) + print('\n\n') ret = False return ret From 48c66a219859666cd2bdc5bec9a346bf67cf5449 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Fri, 23 Dec 2022 18:05:50 +1100 Subject: [PATCH 29/46] Add tests that check direct `ccr` modification. --- test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test.py b/test.py index d51c6e6..094814f 100644 --- a/test.py +++ b/test.py @@ -51,6 +51,12 @@ # rts (b'\x4e\x75', ''), + + # andi.b #$-2,ccr + (b'\x02\x3c\x00\xfe', 'LLIL_SET_FLAG(c,LLIL_CONST.b(0x0))'), + + # ori.b #$1,ccr + (b'\x00\x3c\x00\x01', 'LLIL_SET_FLAG(c,LLIL_CONST.b(0x1)); LLIL_SET_FLAG(x,LLIL_CONST.b(0x1))'), ] if RTS_PASS_FLAGS: From fe7739144bc9c1c4a1478cb955c80730da94d7b1 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 26 Jan 2023 23:20:39 +1100 Subject: [PATCH 30/46] Make tests pass on latest dev binary ninja. --- test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test.py b/test.py index 094814f..b14f97d 100644 --- a/test.py +++ b/test.py @@ -35,7 +35,7 @@ (b'\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74', 'LLIL_SET_REG.d(a0,LLIL_CONST_PTR.d(0x279D2E)); LLIL_STORE.d{nzvc}(LLIL_ADD.d(LLIL_REG.d(a6),LLIL_CONST.w(0x74)),LLIL_LOAD.d(LLIL_SUB.d(LLIL_REG.d(a0),LLIL_CONST.w(0x4))))'), # beq (data_10) - (b'\x67\x00\x00\x0e', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_E,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x10))'), + (b'\x67\x00\x00\x0e', 'LLIL_IF(LLIL_FLAG_COND(0,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x10))'), # jmp ($4c862) (b'\x4e\xf9\x00\x04\xc8\x62', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x4C862))'), @@ -47,7 +47,7 @@ (b'\x51\xcf\xff\xd4','; '.join(['LLIL_IF(LLIL_CONST.b(0x0),6,1); ' + FINALIZER + '; LLIL_SET_REG.w(temp0,LLIL_SUB.w(LLIL_REG.w(d7),LLIL_CONST.w(0x1))); LLIL_SET_REG.w(d7.w,LLIL_REG.w(temp0)); LLIL_IF(LLIL_CMP_E.w(LLIL_REG.w(temp0),LLIL_CONST.w(0xFFFF)),6,4); LLIL_JUMP(LLIL_CONST_PTR.d(0xFFFFFFD6))'])), # bcc (data_5a) - (b'\x64\x00\x00\x58', 'LLIL_IF(LLIL_FLAG_COND(LowLevelILFlagCondition.LLFC_UGE,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x5A))'), + (b'\x64\x00\x00\x58', 'LLIL_IF(LLIL_FLAG_COND(7,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x5A))'), # rts (b'\x4e\x75', ''), @@ -57,6 +57,9 @@ # ori.b #$1,ccr (b'\x00\x3c\x00\x01', 'LLIL_SET_FLAG(c,LLIL_CONST.b(0x1)); LLIL_SET_FLAG(x,LLIL_CONST.b(0x1))'), + + # scs.b d1 + (b'\x55\xc1', 'LLIL_IF(LLIL_FLAG_COND(3,None),1,3); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x1)); LLIL_GOTO(5); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x0)); LLIL_GOTO(5)'), ] if RTS_PASS_FLAGS: From f668b4121489beb634a7b5dd069587d26fdf7912 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 26 Jan 2023 23:21:46 +1100 Subject: [PATCH 31/46] Add test for current 'swap' behaviour. --- test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test.py b/test.py index b14f97d..3812f29 100644 --- a/test.py +++ b/test.py @@ -60,6 +60,9 @@ # scs.b d1 (b'\x55\xc1', 'LLIL_IF(LLIL_FLAG_COND(3,None),1,3); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x1)); LLIL_GOTO(5); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x0)); LLIL_GOTO(5)'), + + # swap d6 + (b'\x48\x46', 'LLIL_SET_REG.d(d6,LLIL_ROR.d(LLIL_REG.d(d6),LLIL_CONST.b(0x10)))'), ] if RTS_PASS_FLAGS: From cbace4463ee9b2f8fc9bafe98c7bc5b8a10e1c97 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 12:29:52 +1100 Subject: [PATCH 32/46] Adopt uv, CI, and mock-based tests --- .github/workflows/ci.yml | 56 +++++++++++ .gitignore | 12 +++ AGENTS.md | 47 ++++++++++ logging.py | 55 +++++++++++ m68k_disasm.py | 4 +- m68k_ops.py | 3 +- pyproject.toml | 44 +++++++++ tests/conftest.py | 194 +++++++++++++++++++++++++++++++++++++++ tests/test_llil.py | 99 ++++++++++++++++++++ uv.lock | 121 ++++++++++++++++++++++++ 10 files changed, 632 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 AGENTS.md create mode 100644 logging.py create mode 100644 pyproject.toml create mode 100644 tests/conftest.py create mode 100644 tests/test_llil.py create mode 100644 uv.lock diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ceffaf4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,56 @@ +name: CI + +on: + push: + branches: [main, master] + pull_request: + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install dependencies + run: uv sync --frozen --extra dev + + - name: Ruff + run: uv run ruff check . + + pytest: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install dependencies + run: uv sync --frozen --extra dev + + - name: Pytest + env: + FORCE_BINJA_MOCK: 1 + run: uv run pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fba8d76 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Python / tooling +.venv/ +__pycache__/ +*.py[cod] +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ +.DS_Store + +# Local reference checkout (not part of this repo) +binja-esr/ + diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..2a543d8 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,47 @@ +# Repository Guidelines + +## Project Structure & Module Organization + +- `__init__.py`: Binary Ninja plugin entrypoint (registers architectures and commands). +- `m68k.py`: core Architecture + lifter (LLIL generation). +- `m68k_disasm.py` / `m68k_ops.py`: instruction decoding and operand helpers. +- `plugin.json`: Plugin Manager metadata. +- `test.py`: LLIL regression tests (requires Binary Ninja’s Python API). +- `binja-esr/`: newer, more fully-tooled variant; follow `binja-esr/AGENTS.md` when contributing there. + +## Build, Test, and Development Commands + +Tooling: use `uv` for dependency management and running commands. + +- Install dev deps: `uv sync --extra dev` +- Lint: `uv run ruff check .` +- Unit tests (uses mocks, no Binary Ninja required): `uv run pytest` + +- Load locally: place this folder in your Binary Ninja plugins directory and restart Binary Ninja. +- Syntax-only check (no Binary Ninja required): `python3 -m compileall .` +- Run tests inside Binary Ninja’s Python environment: + - In the Binary Ninja scripting console: `from m68k.test import test_all; assert test_all()` + +## Coding Style & Naming Conventions + +- Python 3, 4-space indentation; keep changes small and readable. +- Prefer type hints where practical (matches the existing `m68k.py` style). +- Naming: `snake_case` for functions/variables, `CamelCase` for classes, `UPPER_SNAKE_CASE` for constants. +- Use `logging.py` (`log_debug`, `log_info`, …) instead of `print` so output goes to the Binary Ninja log. + +## Testing Guidelines + +- Add/adjust cases in `test.py` when changing decode or lifting behavior. +- Tests compare LLIL string output; update expected strings only for intentional behavior changes (note the Binary Ninja version if the IL printer changed). +- For mock-based tests, use the shared helper dependency (`binja-test-mocks` / “binja-test-helpers”) instead of duplicating Binary Ninja API stubs in this repo; if an API surface is missing, expand it upstream and bump the dependency. + +## Commit & Pull Request Guidelines + +- Commits use short, imperative summaries (e.g., “Fix TST flags”, “Add tests for calling functions”). +- PRs should include: what instructions/flags changed, how to reproduce, and confirmation that `test_all()` passes (or why it cannot). + +## CI & Git Tips + +- Watch required checks (use `--watch`; there is no `--wait`): `gh pr checks --watch --interval 5 --required` (add `--fail-fast` if desired). +- Continue a rebase without an editor prompt: `GIT_EDITOR=true git rebase --continue` +- Scripted interactive rebase todo editing: `GIT_SEQUENCE_EDITOR=true git rebase -i ` (keep these env vars per-command; don’t export globally). diff --git a/logging.py b/logging.py new file mode 100644 index 0000000..447f2a6 --- /dev/null +++ b/logging.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import sys + +# This repository includes `logging.py` for Binary Ninja logging helpers. When the +# repo root is on `sys.path`, this filename can shadow the stdlib `logging` +# module. If imported as top-level `logging`, delegate to the real stdlib module +# so tools like `pytest` keep working. +if __name__ == "logging": + import importlib.util + import sysconfig + from pathlib import Path + + stdlib_path = sysconfig.get_path("stdlib") + if not stdlib_path: + raise ImportError("Unable to locate stdlib path for `logging`") + + stdlib_logging = Path(stdlib_path) / "logging" / "__init__.py" + spec = importlib.util.spec_from_file_location("logging", stdlib_logging) + if spec is None or spec.loader is None: + raise ImportError("Unable to load stdlib `logging` module spec") + + module = importlib.util.module_from_spec(spec) + sys.modules[__name__] = module + spec.loader.exec_module(module) + globals().update(module.__dict__) +else: + import logging as _py_logging + + try: + import binaryninja # type: ignore[import-not-found] + except ImportError: # pragma: no cover + binaryninja = None # type: ignore[assignment] + + __module__ = sys.modules[__name__] + + _bn_logger = getattr(binaryninja, "Logger", None) if binaryninja is not None else None + if _bn_logger is not None: + __logger = _bn_logger(0, __module__.__name__) + + log = __logger.log + log_debug = __logger.log_debug + log_info = __logger.log_info + log_warn = __logger.log_warn + log_error = __logger.log_error + log_alert = __logger.log_alert + else: + __logger = _py_logging.getLogger(__module__.__name__) + + log = __logger.log + log_debug = __logger.debug + log_info = __logger.info + log_warn = __logger.warning + log_error = __logger.error + log_alert = __logger.critical diff --git a/m68k_disasm.py b/m68k_disasm.py index 653034d..f1776fd 100644 --- a/m68k_disasm.py +++ b/m68k_disasm.py @@ -26,7 +26,8 @@ import traceback import os -from binaryninja.architecture import Architecture, RegisterInfo, InstructionInfo, InstructionTextToken +from binaryninja.architecture import Architecture +from binaryninja.function import RegisterInfo, InstructionInfo, InstructionTextToken from binaryninja.lowlevelil import LowLevelILFunction, LowLevelILLabel, LLIL_TEMP from binaryninja.binaryview import BinaryView from binaryninja.plugin import PluginCommand @@ -875,4 +876,3 @@ def decode_instruction(self, data: bytes, addr: int) -> Tuple[str, int, Optional #print((instr, length, size, source, dest, third)) return instr, length, size, source, dest, third - diff --git a/m68k_ops.py b/m68k_ops.py index cb84a29..a071ec6 100644 --- a/m68k_ops.py +++ b/m68k_ops.py @@ -28,7 +28,8 @@ import traceback import os -from binaryninja.architecture import Architecture, RegisterInfo, InstructionInfo, InstructionTextToken +from binaryninja.architecture import Architecture +from binaryninja.function import RegisterInfo, InstructionInfo, InstructionTextToken from binaryninja.lowlevelil import LowLevelILFunction, LowLevelILLabel, LLIL_TEMP, LowLevelILFunction, ExpressionIndex from binaryninja.binaryview import BinaryView from binaryninja.plugin import PluginCommand diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3be334d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "binaryninja-m68k" +version = "0.5.0" +description = "Binary Ninja disassembler and lifter for the Motorola 68k architecture." +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "binja-test-mocks>=0.1.5", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "ruff>=0.1.0", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] +norecursedirs = ["binja-esr"] + +[tool.ruff] +line-length = 120 +target-version = "py311" +extend-exclude = ["binja-esr"] + +[tool.ruff.lint] +ignore = [ + "E402", # imports not at top of file (legacy plugin/test layout) + "E501", # line too long (legacy code) + "E701", # multiple statements on one line (legacy code) + "E711", # `== None` (legacy code) + "E713", # `not in` (legacy code) + "F401", # unused imports (plugin entrypoint relies on side effects) + "F403", # `import *` used (legacy plugin layout) + "F405", # names from star imports (legacy plugin layout) + "F811", # redefinition from imports (legacy code) +] + +[tool.uv] +package = false diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..25aae83 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import os +import sys +import types +from pathlib import Path + +os.environ.setdefault("FORCE_BINJA_MOCK", "1") + +# Installs a stubbed `binaryninja` module into `sys.modules`. +from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore + + +def _install_llil_test_helpers() -> None: + import binaryninja + from binja_test_mocks import mock_llil + from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockLowLevelILFunction, MockReg, mreg + from binaryninja.enums import LowLevelILOperation + + mock_llil.set_size_lookup( + {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, + {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, + ) + + def _name_str(self) -> str: # type: ignore[no-untyped-def] + return self.name + + MockReg.__str__ = _name_str # type: ignore[assignment] + MockFlag.__str__ = _name_str # type: ignore[assignment] + + if not isinstance(MockLLIL.__dict__.get("flags"), property): + _flags_method = MockLLIL.flags + + @property + def flags(self) -> str | None: # type: ignore[no-redef] + return _flags_method(self) + + MockLLIL.flags = flags # type: ignore[assignment] + + if not hasattr(MockLLIL, "operation"): + + @property + def operation(self) -> object: # type: ignore[no-redef] + return getattr(LowLevelILOperation, f"LLIL_{self.bare_op()}") + + @property + def operands(self) -> list[object]: # type: ignore[no-redef] + return self.ops + + @property + def size(self) -> int | None: # type: ignore[no-redef] + return self.width() + + @property + def constant(self) -> int: # type: ignore[no-redef] + if self.operation in (LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR): + return int(self.ops[0]) + raise AttributeError("Instruction has no constant") + + MockLLIL.operation = operation # type: ignore[attr-defined] + MockLLIL.operands = operands # type: ignore[attr-defined] + MockLLIL.size = size # type: ignore[attr-defined] + MockLLIL.constant = constant # type: ignore[attr-defined] + + class LowLevelILFunction(MockLowLevelILFunction): # noqa: N801 + _default_current_address: int = 0 + + def __init__(self, *_args: object, **_kwargs: object) -> None: + super().__init__() + self.current_address = self._default_current_address + self._label_ids: dict[object, int] = {} + self._next_label_id = 1 + + def __iter__(self): + return iter(self.ils) + + def __getitem__(self, idx: object) -> object: + if isinstance(idx, int): + return self.ils[idx] + return idx + + def _label_id(self, label: object) -> int: + existing = self._label_ids.get(label) + if existing is not None: + return existing + assigned = self._next_label_id + self._label_ids[label] = assigned + self._next_label_id += 2 + return assigned + + def expr(self, *args: object, **kwargs: object) -> object: # type: ignore[override] + flags = kwargs.get("flags") + if flags in (0, "0"): + kwargs["flags"] = None + return super().expr(*args, **kwargs) + + def reg(self, size: int, reg_obj: object) -> object: # type: ignore[override] + ExpressionIndex = binaryninja.lowlevelil.ExpressionIndex # type: ignore[attr-defined] + if isinstance(reg_obj, ExpressionIndex): + reg_obj = mreg(f"temp{int(reg_obj) - 0x80000000}") + return super().reg(size, reg_obj) + + def set_reg(self, size: int, reg_obj: object, value: object, flags: object = 0) -> object: # type: ignore[override] + ExpressionIndex = binaryninja.lowlevelil.ExpressionIndex # type: ignore[attr-defined] + if isinstance(reg_obj, ExpressionIndex): + reg_obj = mreg(f"temp{int(reg_obj) - 0x80000000}") + if isinstance(reg_obj, str): + reg_obj = mreg(reg_obj) + flags_arg = None if flags in (0, "0") else flags + return self.expr(LowLevelILOperation.LLIL_SET_REG, reg_obj, value, size=size, flags=flags_arg) + + def goto(self, label, loc=None) -> object: # type: ignore[override] + return self.expr(LowLevelILOperation.LLIL_GOTO, self._label_id(label), size=None) + + def if_expr(self, cond, t, f) -> object: # type: ignore[override] + return self.expr( + LowLevelILOperation.LLIL_IF, + cond, + self._label_id(t), + self._label_id(f), + size=None, + ) + + def flag_condition(self, cond, loc=None) -> object: # type: ignore[override] + return self.expr(LowLevelILOperation.LLIL_FLAG_COND, int(cond), None, size=None) + + def mark_label(self, label) -> None: # type: ignore[override] + self._label_id(label) + return None + + def get_label_for_address(self, _arch: object, _addr: int): # type: ignore[override] + return None + + llil_mod = sys.modules.get("binaryninja.lowlevelil") + if llil_mod is not None: + llil_mod.LowLevelILFunction = LowLevelILFunction # type: ignore[attr-defined] + llil_mod.LowLevelILInstruction = MockLLIL # type: ignore[attr-defined] + binaryninja.lowlevelil.LowLevelILFunction = LowLevelILFunction # type: ignore[attr-defined] + binaryninja.lowlevelil.LowLevelILInstruction = MockLLIL # type: ignore[attr-defined] + + +def _patch_missing_binaryninja_submodules() -> None: + if "binaryninja.plugin" not in sys.modules: + plugin_mod = types.ModuleType("binaryninja.plugin") + + class PluginCommand: # noqa: N801 + @staticmethod + def register_for_address(*_args, **_kwargs) -> None: + return None + + @staticmethod + def register(*_args, **_kwargs) -> None: + return None + + plugin_mod.PluginCommand = PluginCommand + sys.modules["binaryninja.plugin"] = plugin_mod + + interaction_mod = sys.modules.get("binaryninja.interaction") + if interaction_mod is not None: + if not hasattr(interaction_mod, "AddressField"): + + class AddressField: # noqa: N801 + def __init__(self, *_args, **_kwargs) -> None: + return None + + interaction_mod.AddressField = AddressField + + if not hasattr(interaction_mod, "ChoiceField"): + + class ChoiceField: # noqa: N801 + def __init__(self, *_args, **_kwargs) -> None: + return None + + interaction_mod.ChoiceField = ChoiceField + + if not hasattr(interaction_mod, "get_form_input"): + + def get_form_input(*_args, **_kwargs) -> bool: # noqa: N802 + return False + + interaction_mod.get_form_input = get_form_input + + +def _install_repo_as_m68k_package() -> None: + repo_root = Path(__file__).resolve().parents[1] + pkg = types.ModuleType("m68k") + pkg.__path__ = [str(repo_root)] + pkg.__file__ = str(repo_root / "__init__.py") + sys.modules["m68k"] = pkg + + +_patch_missing_binaryninja_submodules() +_install_llil_test_helpers() +_install_repo_as_m68k_package() diff --git a/tests/test_llil.py b/tests/test_llil.py new file mode 100644 index 0000000..76f9da9 --- /dev/null +++ b/tests/test_llil.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import importlib + +import pytest +from binaryninja import lowlevelil + +m68k_test = importlib.import_module("m68k.test") +m68k_arch = importlib.import_module("m68k.m68k") + + +def _strip_finalizer(il_str: str) -> str: + finalizer = m68k_test.FINALIZER + if il_str.endswith(finalizer): + il_str = il_str[: il_str.index(finalizer)] + if il_str.endswith("; "): + il_str = il_str[:-2] + return il_str + + +def _canonicalize_labels(il_str: str) -> str: + label_map: dict[str, int] = {} + next_label = 1 + + def _alloc(label_str: str) -> str: + nonlocal next_label + if label_str not in label_map: + label_map[label_str] = next_label + next_label += 2 + return str(label_map[label_str]) + + def _split_top_level_args(arg_str: str) -> list[str]: + parts: list[str] = [] + buf: list[str] = [] + depth = 0 + for ch in arg_str: + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + elif ch == "," and depth == 0: + parts.append("".join(buf).strip()) + buf = [] + continue + buf.append(ch) + parts.append("".join(buf).strip()) + return parts + + instructions = [] if not il_str else il_str.split("; ") + out: list[str] = [] + for instr in instructions: + if instr.startswith("LLIL_GOTO(") and instr.endswith(")"): + label = instr[len("LLIL_GOTO(") : -1].strip() + out.append(f"LLIL_GOTO({_alloc(label)})") + continue + + if instr.startswith("LLIL_IF(") and instr.endswith(")"): + inner = instr[len("LLIL_IF(") : -1] + args = _split_top_level_args(inner) + if len(args) == 3: + args[1] = _alloc(args[1]) + args[2] = _alloc(args[2]) + out.append(f"LLIL_IF({','.join(args)})") + continue + + out.append(instr) + + return "; ".join(out) + + +def _normalize_il(il_str: str) -> str: + il_str = _strip_finalizer(il_str) + if not il_str: + return "" + + finalizer_parts = {p.strip() for p in m68k_test.FINALIZER.split("; ") if p.strip()} + instructions = [p for p in il_str.split("; ") if p and p not in finalizer_parts] + return _canonicalize_labels("; ".join(instructions)) + + +def _lift_to_il_str(data: bytes, *, start_addr: int = 0) -> str: + arch = m68k_arch.M68000() + il = lowlevelil.LowLevelILFunction(arch) + + offset = 0 + while offset < len(data): + il.current_address = start_addr + offset # type: ignore[attr-defined] + il.__class__._default_current_address = il.current_address # type: ignore[attr-defined] + length = arch.get_instruction_low_level_il(data[offset:], start_addr + offset, il) + assert length is not None and length > 0 + offset += length + + result = "; ".join(m68k_test.il2str(instr) for instr in il) + return _strip_finalizer(result) + + +@pytest.mark.parametrize("data, expected", m68k_test.test_cases) +def test_llil_regressions(data: bytes, expected: str) -> None: + assert _normalize_il(_lift_to_il_str(data)) == _normalize_il(expected) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..247eecc --- /dev/null +++ b/uv.lock @@ -0,0 +1,121 @@ +version = 1 +revision = 1 +requires-python = ">=3.11" + +[[package]] +name = "binaryninja-m68k" +version = "0.5.0" +source = { virtual = "." } +dependencies = [ + { name = "binja-test-mocks" }, +] + +[package.optional-dependencies] +dev = [ + { name = "pytest" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "binja-test-mocks", specifier = ">=0.1.5" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, +] +provides-extras = ["dev"] + +[[package]] +name = "binja-test-mocks" +version = "0.1.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/78/bd/7b47567914b7af03df8d691d7b7dafe4d0cde84d1c174ddb4780026960b0/binja_test_mocks-0.1.5.tar.gz", hash = "sha256:bb5b5a97d247f7d2c538016a94694f8c02364b5ff589a582d7dc97b53a398aea", size = 22544 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/d7/c42e6990a71894c157148acac88826f7882c3fdcd43bef4060eb15f39c81/binja_test_mocks-0.1.5-py3-none-any.whl", hash = "sha256:aac33480aabd16a81bdff4c2c5a8ef965a942214d1fba3fcf3233c2b2144f341", size = 26806 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, +] + +[[package]] +name = "ruff" +version = "0.14.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080 }, + { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320 }, + { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434 }, + { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961 }, + { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629 }, + { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234 }, + { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890 }, + { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172 }, + { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260 }, + { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978 }, + { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036 }, + { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051 }, + { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998 }, + { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891 }, + { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660 }, + { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187 }, + { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283 }, + { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839 }, +] From adbc5a1151feed28975ee19c8b4704cfc2853fa4 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 12:31:32 +1100 Subject: [PATCH 33/46] Docs: note PR target repo --- AGENTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AGENTS.md b/AGENTS.md index 2a543d8..9a48df1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,6 +39,7 @@ Tooling: use `uv` for dependency management and running commands. - Commits use short, imperative summaries (e.g., “Fix TST flags”, “Add tests for calling functions”). - PRs should include: what instructions/flags changed, how to reproduce, and confirmation that `test_all()` passes (or why it cannot). +- Open PRs against `mblsha/binaryninja-m68k` (this fork) unless explicitly coordinating with upstream; with GitHub CLI use `gh pr create --repo mblsha/binaryninja-m68k ...` to avoid targeting `galenbwill/binaryninja-m68k` by default. ## CI & Git Tips From 57ec1521328c4ed20b75230c30528cd232dc19b3 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 12:37:15 +1100 Subject: [PATCH 34/46] Make plugin __init__ safe for pytest --- __init__.py | 70 +++++++++++++++++++++++++++++++------------------- pyproject.toml | 1 + 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/__init__.py b/__init__.py index 906dcf2..04baa99 100644 --- a/__init__.py +++ b/__init__.py @@ -1,38 +1,54 @@ -import sys +from __future__ import annotations + import os -import binaryninja -from .logging import log_debug, __module__ -log_debug(f'm68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}') -from .m68k import * -from .test import test_all -from binaryninja import Architecture, CallingConvention +def _should_register_plugin() -> bool: + # When this repo is checked out into a directory with a non-importable name + # (e.g. `binaryninja-m68k` on GitHub Actions), pytest may import this file as + # a standalone module named `__init__`. Avoid side effects and relative + # imports in that context. + return bool(__package__) + + +if _should_register_plugin(): + import binaryninja + + from .logging import __module__, log_debug + + log_debug(f"m68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}") -#PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) -PluginCommand.register_for_address("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) + from .m68k import * # noqa: F403 + from .test import test_all + from binaryninja import Architecture, CallingConvention -M68000.register() -M68008.register() -M68010.register() -M68020.register() -M68030.register() -M68040.register() -M68LC040.register() -M68EC040.register() -M68330.register() -M68340.register() + # PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) + PluginCommand.register_for_address( # type: ignore[name-defined] + "Create M68k vector table", + "Create M68k vector table", + prompt_create_vector_table, # type: ignore[name-defined] + ) -# BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) -BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) + M68000.register() + M68008.register() + M68010.register() + M68020.register() + M68030.register() + M68040.register() + M68LC040.register() + M68EC040.register() + M68330.register() + M68340.register() -class ParametersInRegistersCallingConvention(CallingConvention): - name = "ParametersInRegisters" + # BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) + BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) # type: ignore[name-defined] + class ParametersInRegistersCallingConvention(CallingConvention): + name = "ParametersInRegisters" -arch = Architecture['M68000'] -arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, 'default')) + arch = Architecture["M68000"] + arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) -BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) + BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) # type: ignore[name-defined] -test_all() + test_all() diff --git a/pyproject.toml b/pyproject.toml index 3be334d..6d7d0d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dev = [ [tool.pytest.ini_options] testpaths = ["tests"] norecursedirs = ["binja-esr"] +addopts = ["--import-mode=importlib"] [tool.ruff] line-length = 120 From 41db1fa85057d7f4e70fdbfe9f29ef4392158a61 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 13:58:49 +1100 Subject: [PATCH 35/46] Switch LLIL tests to structural mocks --- pyproject.toml | 2 +- test.py | 286 +++++++++++++++++++++++++-------------------- tests/conftest.py | 177 +--------------------------- tests/test_llil.py | 134 ++++++++++----------- uv.lock | 8 +- 5 files changed, 228 insertions(+), 379 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6d7d0d5..7bdce13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ description = "Binary Ninja disassembler and lifter for the Motorola 68k archite readme = "README.md" requires-python = ">=3.11" dependencies = [ - "binja-test-mocks>=0.1.5", + "binja-test-mocks>=0.1.7", ] [project.optional-dependencies] diff --git a/test.py b/test.py index 3812f29..6cb8d08 100644 --- a/test.py +++ b/test.py @@ -1,168 +1,198 @@ -# inspired by https://github.com/Vector35/arch-arm64/blob/staging/arm64test.py -from .m68k import * +from __future__ import annotations -SAVE_FLAGS = '' -RESTORE_FLAGS = '' -FINALIZER = 'LLIL_RET(LLIL_POP.d())' +import os +from dataclasses import dataclass -if RTS_PASS_FLAGS: - SAVE_FLAGS = 'LLIL_SET_REG.b(rc,LLIL_FLAG(c))' - RESTORE_FLAGS = '; LLIL_SET_FLAG(c,LLIL_REG.b(rc))' - FINALIZER = SAVE_FLAGS + '; ' + FINALIZER +os.environ.setdefault("FORCE_BINJA_MOCK", "1") -test_cases = [ +# Installs a stubbed `binaryninja` module into `sys.modules`. +from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore +from binja_test_mocks import mock_llil +from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockReg, mllil, mreg + +from .m68k import RTS_PASS_FLAGS + +mock_llil.set_size_lookup( + {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, + {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, +) + + +@dataclass(frozen=True) +class LabelRef: + """Placeholder for a LowLevelILLabel (bound during assertion).""" + + name: str + + +def _l(name: str) -> LabelRef: + return LabelRef(name) + + +def _flag(name: str) -> MockFlag: + return MockFlag(name) + + +def _il(op: str, *ops: object) -> MockLLIL: + return mllil(op, list(ops)) + + +def _maybe_restore_flags() -> list[MockLLIL]: + if not RTS_PASS_FLAGS: + return [] + return [_il("SET_FLAG", _flag("c"), _il("REG.b", mreg("rc")))] + + +def _rts_expected() -> list[MockLLIL]: + out: list[MockLLIL] = [] + if RTS_PASS_FLAGS: + out.append(_il("SET_REG.b", mreg("rc"), _il("FLAG", _flag("c")))) + out.append(_il("RET", _il("POP.d"))) + return out + + +test_cases: list[tuple[bytes, list[MockLLIL]]] = [ # moveq #$0000,d0 - (b'\x70\x00', 'LLIL_SET_REG.d{nzvc}(d0,LLIL_CONST.d(0x0))'), + (b"\x70\x00", [_il("SET_REG.d{nzvc}", mreg("d0"), _il("CONST.d", 0))]), # subq.b #$1,d0 # FIXME: Generate flag 'x' - (b'\x53\x00', 'LLIL_SET_REG.b(d0.b,LLIL_SUB.b{*}(LLIL_REG.b(d0),LLIL_CONST.b(0x1)))'), + ( + b"\x53\x00", + [ + _il( + "SET_REG.b", + mreg("d0.b"), + _il("SUB.b{*}", _il("REG.b", mreg("d0")), _il("CONST.b", 1)), + ) + ], + ), # jsr 0x5dc1c, no arguments for this call - (b'\x4e\xb9\x00\x05\xdc\x1c', 'LLIL_CALL(LLIL_CONST_PTR.d(0x5DC1C))' + RESTORE_FLAGS), + (b"\x4e\xb9\x00\x05\xdc\x1c", [_il("CALL", _il("CONST_PTR.d", 0x5DC1C))] + _maybe_restore_flags()), # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call - (b'\x4e\xba\x00\x0a', 'LLIL_CALL(LLIL_CONST_PTR.d(0xC))' + RESTORE_FLAGS), + (b"\x4e\xba\x00\x0a", [_il("CALL", _il("CONST_PTR.d", 0xC))] + _maybe_restore_flags()), # lea (data_7a9ee[2]),a1 - (b'\x43\xf9\x00\x07\xa9\xf0', 'LLIL_SET_REG.d(a1,LLIL_CONST_PTR.d(0x7A9F0))'), + (b"\x43\xf9\x00\x07\xa9\xf0", [_il("SET_REG.d", mreg("a1"), _il("CONST_PTR.d", 0x7A9F0))]), # tst.w d1 - (b'\x4a\x41', 'LLIL_SUB.w{nz}(LLIL_REG.w(d1),LLIL_CONST.w(0x0)); LLIL_SET_FLAG(v,LLIL_CONST.b(0x0)); LLIL_SET_FLAG(c,LLIL_CONST.b(0x0))'), + ( + b"\x4a\x41", + [ + _il("SUB.w{nz}", _il("REG.w", mreg("d1")), _il("CONST.w", 0)), + _il("SET_FLAG", _flag("v"), _il("CONST.b", 0)), + _il("SET_FLAG", _flag("c"), _il("CONST.b", 0)), + ], + ), # lea ($279d2e),a0 # move $-004(a0),$0074(a6) - (b'\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74', 'LLIL_SET_REG.d(a0,LLIL_CONST_PTR.d(0x279D2E)); LLIL_STORE.d{nzvc}(LLIL_ADD.d(LLIL_REG.d(a6),LLIL_CONST.w(0x74)),LLIL_LOAD.d(LLIL_SUB.d(LLIL_REG.d(a0),LLIL_CONST.w(0x4))))'), + ( + b"\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74", + [ + _il("SET_REG.d", mreg("a0"), _il("CONST_PTR.d", 0x279D2E)), + _il( + "STORE.d{nzvc}", + _il("ADD.d", _il("REG.d", mreg("a6")), _il("CONST.w", 0x74)), + _il( + "LOAD.d", + _il("SUB.d", _il("REG.d", mreg("a0")), _il("CONST.w", 0x4)), + ), + ), + ], + ), # beq (data_10) - (b'\x67\x00\x00\x0e', 'LLIL_IF(LLIL_FLAG_COND(0,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x10))'), + ( + b"\x67\x00\x00\x0e", + [ + _il("IF", _il("FLAG_COND", 0, None), _l("t"), _l("f")), + _il("JUMP", _il("CONST_PTR.d", 0x10)), + ], + ), # jmp ($4c862) - (b'\x4e\xf9\x00\x04\xc8\x62', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x4C862))'), + (b"\x4e\xf9\x00\x04\xc8\x62", [_il("JUMP", _il("CONST_PTR.d", 0x4C862))]), # bra (data_28) - (b'\x60\x00\x00\x26', 'LLIL_JUMP(LLIL_CONST_PTR.d(0x28))'), + (b"\x60\x00\x00\x26", [_il("JUMP", _il("CONST_PTR.d", 0x28))]), # dbf d7,(data_-2c) - (b'\x51\xcf\xff\xd4','; '.join(['LLIL_IF(LLIL_CONST.b(0x0),6,1); ' + FINALIZER + '; LLIL_SET_REG.w(temp0,LLIL_SUB.w(LLIL_REG.w(d7),LLIL_CONST.w(0x1))); LLIL_SET_REG.w(d7.w,LLIL_REG.w(temp0)); LLIL_IF(LLIL_CMP_E.w(LLIL_REG.w(temp0),LLIL_CONST.w(0xFFFF)),6,4); LLIL_JUMP(LLIL_CONST_PTR.d(0xFFFFFFD6))'])), + ( + b"\x51\xcf\xff\xd4", + [ + _il("IF", _il("CONST.b", 0), _l("skip"), _l("decrement")), + _il( + "SET_REG.w", + mreg("TEMP0"), + _il("SUB.w", _il("REG.w", mreg("d7")), _il("CONST.w", 1)), + ), + _il("SET_REG.w", mreg("d7.w"), _il("REG.w", mreg("TEMP0"))), + _il( + "IF", + _il( + "CMP_E.w", + _il("REG.w", mreg("TEMP0")), + _il("CONST.w", -1), + ), + _l("skip"), + _l("branch"), + ), + _il("JUMP", _il("CONST_PTR.d", -42)), + ], + ), # bcc (data_5a) - (b'\x64\x00\x00\x58', 'LLIL_IF(LLIL_FLAG_COND(7,None),1,3); LLIL_JUMP(LLIL_CONST_PTR.d(0x5A))'), + ( + b"\x64\x00\x00\x58", + [ + _il("IF", _il("FLAG_COND", 7, None), _l("t"), _l("f")), + _il("JUMP", _il("CONST_PTR.d", 0x5A)), + ], + ), # rts - (b'\x4e\x75', ''), + (b"\x4e\x75", _rts_expected()), # andi.b #$-2,ccr - (b'\x02\x3c\x00\xfe', 'LLIL_SET_FLAG(c,LLIL_CONST.b(0x0))'), + (b"\x02\x3c\x00\xfe", [_il("SET_FLAG", _flag("c"), _il("CONST.b", 0))]), # ori.b #$1,ccr - (b'\x00\x3c\x00\x01', 'LLIL_SET_FLAG(c,LLIL_CONST.b(0x1)); LLIL_SET_FLAG(x,LLIL_CONST.b(0x1))'), + ( + b"\x00\x3c\x00\x01", + [ + _il("SET_FLAG", _flag("c"), _il("CONST.b", 1)), + _il("SET_FLAG", _flag("x"), _il("CONST.b", 1)), + ], + ), # scs.b d1 - (b'\x55\xc1', 'LLIL_IF(LLIL_FLAG_COND(3,None),1,3); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x1)); LLIL_GOTO(5); LLIL_SET_REG.b(d1.b,LLIL_CONST.b(0x0)); LLIL_GOTO(5)'), + ( + b"\x55\xc1", + [ + _il("IF", _il("FLAG_COND", 3, None), _l("set"), _l("clear")), + _il("SET_REG.b", mreg("d1.b"), _il("CONST.b", 1)), + _il("GOTO", _l("skip")), + _il("SET_REG.b", mreg("d1.b"), _il("CONST.b", 0)), + _il("GOTO", _l("skip")), + ], + ), # swap d6 - (b'\x48\x46', 'LLIL_SET_REG.d(d6,LLIL_ROR.d(LLIL_REG.d(d6),LLIL_CONST.b(0x10)))'), -] + ( + b"\x48\x46", + [_il("SET_REG.d", mreg("d6"), _il("ROR.d", _il("REG.d", mreg("d6")), _il("CONST.b", 0x10)))], + ), -if RTS_PASS_FLAGS: - # rtr - test_cases.append((b'\x4e\x77', 'LLIL_SET_REG.w(ccr,LLIL_POP.w()); LLIL_RET(LLIL_POP.d())')) -else: # rtr - test_cases.append((b'\x4e\x77', 'LLIL_SET_REG.w(ccr,LLIL_POP.w())')) - -import re -import sys -import binaryninja -from binaryninja import binaryview -from binaryninja import lowlevelil -from binaryninja.enums import LowLevelILOperation - -def il2str(il): - sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'} - if isinstance(il, lowlevelil.LowLevelILInstruction): - size_code = sz_lookup.get(il.size, '?') if il.size else '' - flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags - - # print size-specified IL constants in hex - if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size: - tmp = il.operands[0] - if tmp < 0: tmp = (1<<(il.size*8))+tmp - tmp = '0x%X' % tmp if il.size else '%d' % il.size - return '%s%s(%s)' % (il.operation.name, size_code, tmp) - else: - return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands])) - elif isinstance(il, list): - return '[' + ','.join([il2str(x) for x in il]) + ']' - else: - return str(il) - -def instr_to_il(data): - RETURN = b'\x4e\x75' # rts - - platform = binaryninja.Platform['M68000'] - # make a pretend function that returns - bv = binaryview.BinaryView.new(data + RETURN) - bv.add_function(0, plat=platform) - assert len(bv.functions) == 1 - - result = [] - #for block in bv.functions[0].low_level_il: - for block in bv.functions[0].lifted_il: - for il in block: - result.append(il2str(il)) - result = '; '.join(result) - - ret = FINALIZER - if result.endswith(ret): - result = result[0:result.index(ret)] - if result.endswith('; '): - result = result[0:-2] - - return result - -def il_str_to_tree(ilstr): - result = '' - depth = 0 - for c in ilstr: - if c == '(': - result += '\n' - depth += 1 - result += ' '*depth - elif c == ')': - depth -= 1 - elif c == ',': - result += '\n' - result += ' '*depth - pass - elif c == ';': - result += '\n' - depth = 0 - result += ' '*depth - elif c == ' ': - pass - else: - result += c - return result - -# print(il_str_to_tree('foo(bar)')) -# print(il_str_to_tree('a(b(c,d(z),e));d(e(f))')) -# print(il_str_to_tree('LLIL_RET(LLIL_POP.d())' + ';LLIL_RET(LLIL_POP.d())')) - -def test_all(): - ret = True - for (test_i, (data, expected)) in enumerate(test_cases): - actual = instr_to_il(data) - if actual != expected: - print('MISMATCH AT TEST %d!' % test_i) - print('\t input: %s' % data.hex()) - print('\texpected: %s' % expected) - # print(il_str_to_tree(expected)) - print('\t actual: ') - print(actual) - print('\t tree:') - print(il_str_to_tree(actual)) - print('\n\n') - ret = False - return ret + ( + b"\x4e\x77", + [ + _il("SET_REG.w", mreg("ccr"), _il("POP.w")), + _il("RET", _il("POP.d")), + ], + ), +] + diff --git a/tests/conftest.py b/tests/conftest.py index 25aae83..0f59a49 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,176 +9,12 @@ # Installs a stubbed `binaryninja` module into `sys.modules`. from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore +from binja_test_mocks import mock_llil - -def _install_llil_test_helpers() -> None: - import binaryninja - from binja_test_mocks import mock_llil - from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockLowLevelILFunction, MockReg, mreg - from binaryninja.enums import LowLevelILOperation - - mock_llil.set_size_lookup( - {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, - {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, - ) - - def _name_str(self) -> str: # type: ignore[no-untyped-def] - return self.name - - MockReg.__str__ = _name_str # type: ignore[assignment] - MockFlag.__str__ = _name_str # type: ignore[assignment] - - if not isinstance(MockLLIL.__dict__.get("flags"), property): - _flags_method = MockLLIL.flags - - @property - def flags(self) -> str | None: # type: ignore[no-redef] - return _flags_method(self) - - MockLLIL.flags = flags # type: ignore[assignment] - - if not hasattr(MockLLIL, "operation"): - - @property - def operation(self) -> object: # type: ignore[no-redef] - return getattr(LowLevelILOperation, f"LLIL_{self.bare_op()}") - - @property - def operands(self) -> list[object]: # type: ignore[no-redef] - return self.ops - - @property - def size(self) -> int | None: # type: ignore[no-redef] - return self.width() - - @property - def constant(self) -> int: # type: ignore[no-redef] - if self.operation in (LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR): - return int(self.ops[0]) - raise AttributeError("Instruction has no constant") - - MockLLIL.operation = operation # type: ignore[attr-defined] - MockLLIL.operands = operands # type: ignore[attr-defined] - MockLLIL.size = size # type: ignore[attr-defined] - MockLLIL.constant = constant # type: ignore[attr-defined] - - class LowLevelILFunction(MockLowLevelILFunction): # noqa: N801 - _default_current_address: int = 0 - - def __init__(self, *_args: object, **_kwargs: object) -> None: - super().__init__() - self.current_address = self._default_current_address - self._label_ids: dict[object, int] = {} - self._next_label_id = 1 - - def __iter__(self): - return iter(self.ils) - - def __getitem__(self, idx: object) -> object: - if isinstance(idx, int): - return self.ils[idx] - return idx - - def _label_id(self, label: object) -> int: - existing = self._label_ids.get(label) - if existing is not None: - return existing - assigned = self._next_label_id - self._label_ids[label] = assigned - self._next_label_id += 2 - return assigned - - def expr(self, *args: object, **kwargs: object) -> object: # type: ignore[override] - flags = kwargs.get("flags") - if flags in (0, "0"): - kwargs["flags"] = None - return super().expr(*args, **kwargs) - - def reg(self, size: int, reg_obj: object) -> object: # type: ignore[override] - ExpressionIndex = binaryninja.lowlevelil.ExpressionIndex # type: ignore[attr-defined] - if isinstance(reg_obj, ExpressionIndex): - reg_obj = mreg(f"temp{int(reg_obj) - 0x80000000}") - return super().reg(size, reg_obj) - - def set_reg(self, size: int, reg_obj: object, value: object, flags: object = 0) -> object: # type: ignore[override] - ExpressionIndex = binaryninja.lowlevelil.ExpressionIndex # type: ignore[attr-defined] - if isinstance(reg_obj, ExpressionIndex): - reg_obj = mreg(f"temp{int(reg_obj) - 0x80000000}") - if isinstance(reg_obj, str): - reg_obj = mreg(reg_obj) - flags_arg = None if flags in (0, "0") else flags - return self.expr(LowLevelILOperation.LLIL_SET_REG, reg_obj, value, size=size, flags=flags_arg) - - def goto(self, label, loc=None) -> object: # type: ignore[override] - return self.expr(LowLevelILOperation.LLIL_GOTO, self._label_id(label), size=None) - - def if_expr(self, cond, t, f) -> object: # type: ignore[override] - return self.expr( - LowLevelILOperation.LLIL_IF, - cond, - self._label_id(t), - self._label_id(f), - size=None, - ) - - def flag_condition(self, cond, loc=None) -> object: # type: ignore[override] - return self.expr(LowLevelILOperation.LLIL_FLAG_COND, int(cond), None, size=None) - - def mark_label(self, label) -> None: # type: ignore[override] - self._label_id(label) - return None - - def get_label_for_address(self, _arch: object, _addr: int): # type: ignore[override] - return None - - llil_mod = sys.modules.get("binaryninja.lowlevelil") - if llil_mod is not None: - llil_mod.LowLevelILFunction = LowLevelILFunction # type: ignore[attr-defined] - llil_mod.LowLevelILInstruction = MockLLIL # type: ignore[attr-defined] - binaryninja.lowlevelil.LowLevelILFunction = LowLevelILFunction # type: ignore[attr-defined] - binaryninja.lowlevelil.LowLevelILInstruction = MockLLIL # type: ignore[attr-defined] - - -def _patch_missing_binaryninja_submodules() -> None: - if "binaryninja.plugin" not in sys.modules: - plugin_mod = types.ModuleType("binaryninja.plugin") - - class PluginCommand: # noqa: N801 - @staticmethod - def register_for_address(*_args, **_kwargs) -> None: - return None - - @staticmethod - def register(*_args, **_kwargs) -> None: - return None - - plugin_mod.PluginCommand = PluginCommand - sys.modules["binaryninja.plugin"] = plugin_mod - - interaction_mod = sys.modules.get("binaryninja.interaction") - if interaction_mod is not None: - if not hasattr(interaction_mod, "AddressField"): - - class AddressField: # noqa: N801 - def __init__(self, *_args, **_kwargs) -> None: - return None - - interaction_mod.AddressField = AddressField - - if not hasattr(interaction_mod, "ChoiceField"): - - class ChoiceField: # noqa: N801 - def __init__(self, *_args, **_kwargs) -> None: - return None - - interaction_mod.ChoiceField = ChoiceField - - if not hasattr(interaction_mod, "get_form_input"): - - def get_form_input(*_args, **_kwargs) -> bool: # noqa: N802 - return False - - interaction_mod.get_form_input = get_form_input +mock_llil.set_size_lookup( + {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, + {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, +) def _install_repo_as_m68k_package() -> None: @@ -189,6 +25,5 @@ def _install_repo_as_m68k_package() -> None: sys.modules["m68k"] = pkg -_patch_missing_binaryninja_submodules() -_install_llil_test_helpers() _install_repo_as_m68k_package() + diff --git a/tests/test_llil.py b/tests/test_llil.py index 76f9da9..4b83b3d 100644 --- a/tests/test_llil.py +++ b/tests/test_llil.py @@ -1,99 +1,83 @@ from __future__ import annotations import importlib +from typing import Any import pytest from binaryninja import lowlevelil +from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockReg m68k_test = importlib.import_module("m68k.test") m68k_arch = importlib.import_module("m68k.m68k") -def _strip_finalizer(il_str: str) -> str: - finalizer = m68k_test.FINALIZER - if il_str.endswith(finalizer): - il_str = il_str[: il_str.index(finalizer)] - if il_str.endswith("; "): - il_str = il_str[:-2] - return il_str - - -def _canonicalize_labels(il_str: str) -> str: - label_map: dict[str, int] = {} - next_label = 1 - - def _alloc(label_str: str) -> str: - nonlocal next_label - if label_str not in label_map: - label_map[label_str] = next_label - next_label += 2 - return str(label_map[label_str]) - - def _split_top_level_args(arg_str: str) -> list[str]: - parts: list[str] = [] - buf: list[str] = [] - depth = 0 - for ch in arg_str: - if ch == "(": - depth += 1 - elif ch == ")": - depth -= 1 - elif ch == "," and depth == 0: - parts.append("".join(buf).strip()) - buf = [] - continue - buf.append(ch) - parts.append("".join(buf).strip()) - return parts - - instructions = [] if not il_str else il_str.split("; ") - out: list[str] = [] - for instr in instructions: - if instr.startswith("LLIL_GOTO(") and instr.endswith(")"): - label = instr[len("LLIL_GOTO(") : -1].strip() - out.append(f"LLIL_GOTO({_alloc(label)})") - continue - - if instr.startswith("LLIL_IF(") and instr.endswith(")"): - inner = instr[len("LLIL_IF(") : -1] - args = _split_top_level_args(inner) - if len(args) == 3: - args[1] = _alloc(args[1]) - args[2] = _alloc(args[2]) - out.append(f"LLIL_IF({','.join(args)})") - continue - - out.append(instr) - - return "; ".join(out) - - -def _normalize_il(il_str: str) -> str: - il_str = _strip_finalizer(il_str) - if not il_str: - return "" - - finalizer_parts = {p.strip() for p in m68k_test.FINALIZER.split("; ") if p.strip()} - instructions = [p for p in il_str.split("; ") if p and p not in finalizer_parts] - return _canonicalize_labels("; ".join(instructions)) - - -def _lift_to_il_str(data: bytes, *, start_addr: int = 0) -> str: +def _lift_to_llil(data: bytes, *, start_addr: int = 0) -> list[MockLLIL]: arch = m68k_arch.M68000() il = lowlevelil.LowLevelILFunction(arch) offset = 0 while offset < len(data): il.current_address = start_addr + offset # type: ignore[attr-defined] - il.__class__._default_current_address = il.current_address # type: ignore[attr-defined] length = arch.get_instruction_low_level_il(data[offset:], start_addr + offset, il) assert length is not None and length > 0 offset += length - result = "; ".join(m68k_test.il2str(instr) for instr in il) - return _strip_finalizer(result) + return list(il) + + +def _mask_for_size(size_bytes: int) -> int: + return (1 << (size_bytes * 8)) - 1 + + +def _match_node(actual: Any, expected: Any, labels: dict[str, object]) -> None: + if isinstance(expected, m68k_test.LabelRef): + bound = labels.get(expected.name) + if bound is None: + labels[expected.name] = actual + return + assert actual is bound + return + + if isinstance(expected, MockLLIL): + assert isinstance(actual, MockLLIL) + assert actual.op == expected.op + + if expected.bare_op() in ("CONST", "CONST_PTR"): + expected_size = expected.width() + actual_size = actual.width() + assert expected_size == actual_size + assert len(actual.ops) == 1 and len(expected.ops) == 1 + if expected_size is None: + assert actual.ops[0] == expected.ops[0] + else: + mask = _mask_for_size(expected_size) + assert (int(actual.ops[0]) & mask) == (int(expected.ops[0]) & mask) + return + + assert len(actual.ops) == len(expected.ops) + for act_op, exp_op in zip(actual.ops, expected.ops, strict=True): + _match_node(act_op, exp_op, labels) + return + + if isinstance(expected, MockReg): + assert getattr(actual, "name", None) == expected.name + return + + if isinstance(expected, MockFlag): + assert getattr(actual, "name", None) == expected.name + return + + assert actual == expected + + +def assert_llil(actual: list[MockLLIL], expected: list[MockLLIL]) -> None: + assert len(actual) == len(expected) + label_bindings: dict[str, object] = {} + for act, exp in zip(actual, expected, strict=True): + _match_node(act, exp, label_bindings) @pytest.mark.parametrize("data, expected", m68k_test.test_cases) -def test_llil_regressions(data: bytes, expected: str) -> None: - assert _normalize_il(_lift_to_il_str(data)) == _normalize_il(expected) +def test_llil_regressions(data: bytes, expected: list[MockLLIL]) -> None: + assert_llil(_lift_to_llil(data), expected) + diff --git a/uv.lock b/uv.lock index 247eecc..865b6d0 100644 --- a/uv.lock +++ b/uv.lock @@ -18,7 +18,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "binja-test-mocks", specifier = ">=0.1.5" }, + { name = "binja-test-mocks", specifier = ">=0.1.7" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] @@ -26,11 +26,11 @@ provides-extras = ["dev"] [[package]] name = "binja-test-mocks" -version = "0.1.5" +version = "0.1.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/78/bd/7b47567914b7af03df8d691d7b7dafe4d0cde84d1c174ddb4780026960b0/binja_test_mocks-0.1.5.tar.gz", hash = "sha256:bb5b5a97d247f7d2c538016a94694f8c02364b5ff589a582d7dc97b53a398aea", size = 22544 } +sdist = { url = "https://files.pythonhosted.org/packages/de/7e/e3f12cbfbf2f170fa06d23d346cccf830f72cc11a7632b3d81c84942c160/binja_test_mocks-0.1.7.tar.gz", hash = "sha256:91a5d15659e1de2aed83f061029ee176f9d3196e9a9e38952a8ae9733ec6158c", size = 23579 } wheels = [ - { url = "https://files.pythonhosted.org/packages/79/d7/c42e6990a71894c157148acac88826f7882c3fdcd43bef4060eb15f39c81/binja_test_mocks-0.1.5-py3-none-any.whl", hash = "sha256:aac33480aabd16a81bdff4c2c5a8ef965a942214d1fba3fcf3233c2b2144f341", size = 26806 }, + { url = "https://files.pythonhosted.org/packages/6a/85/7e28f778265b89abc5656fb7a7449ff1197523b7aae466482f63b7a1601e/binja_test_mocks-0.1.7-py3-none-any.whl", hash = "sha256:90478540b78c747eb7f3e26fcc31711840086cfe5d65f066cb64e57bcab01395", size = 27825 }, ] [[package]] From 1047a6e079e0f9970d6e5a20d86dfe770921a107 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 14:28:06 +1100 Subject: [PATCH 36/46] Add disasm assertions and align plugin init --- __init__.py | 54 ++++++---- pyproject.toml | 2 +- test.py | 256 +++++++++++++++++++++++++++++++-------------- tests/test_llil.py | 24 ++++- uv.lock | 8 +- 5 files changed, 238 insertions(+), 106 deletions(-) diff --git a/__init__.py b/__init__.py index 04baa99..c7e36e7 100644 --- a/__init__.py +++ b/__init__.py @@ -1,32 +1,52 @@ from __future__ import annotations +import importlib.util import os +import sys +from pathlib import Path -def _should_register_plugin() -> bool: - # When this repo is checked out into a directory with a non-importable name - # (e.g. `binaryninja-m68k` on GitHub Actions), pytest may import this file as - # a standalone module named `__init__`. Avoid side effects and relative - # imports in that context. - return bool(__package__) +# Ensure the plugin directory is available on `sys.path` so that absolute +# imports work when the plugin is loaded directly by Binary Ninja. +_plugin_dir = str(Path(__file__).resolve().parent) +if _plugin_dir not in sys.path: + sys.path.insert(0, _plugin_dir) +def module_exists(module_name: str) -> bool: + if module_name in sys.modules: + return True + try: + return importlib.util.find_spec(module_name) is not None + except (ValueError, ImportError): + return False -if _should_register_plugin(): - import binaryninja +if module_exists("binaryninja") and __package__: + from binaryninja import Architecture, BinaryViewType, CallingConvention + from binaryninja.enums import Endianness + from binaryninja.plugin import PluginCommand from .logging import __module__, log_debug + from .m68k import ( + M68000, + M68008, + M68010, + M68020, + M68030, + M68040, + M68EC040, + M68LC040, + M68330, + M68340, + prompt_create_vector_table, + ) log_debug(f"m68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}") - from .m68k import * # noqa: F403 - from .test import test_all - from binaryninja import Architecture, CallingConvention - # PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) - PluginCommand.register_for_address( # type: ignore[name-defined] + PluginCommand.register_for_address( "Create M68k vector table", "Create M68k vector table", - prompt_create_vector_table, # type: ignore[name-defined] + prompt_create_vector_table, ) M68000.register() @@ -41,7 +61,7 @@ def _should_register_plugin() -> bool: M68340.register() # BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) - BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) # type: ignore[name-defined] + BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) class ParametersInRegistersCallingConvention(CallingConvention): name = "ParametersInRegisters" @@ -49,6 +69,4 @@ class ParametersInRegistersCallingConvention(CallingConvention): arch = Architecture["M68000"] arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) - BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) # type: ignore[name-defined] - - test_all() + BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) diff --git a/pyproject.toml b/pyproject.toml index 7bdce13..d876d72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ description = "Binary Ninja disassembler and lifter for the Motorola 68k archite readme = "README.md" requires-python = ">=3.11" dependencies = [ - "binja-test-mocks>=0.1.7", + "binja-test-mocks>=0.1.8", ] [project.optional-dependencies] diff --git a/test.py b/test.py index 6cb8d08..9fa5f07 100644 --- a/test.py +++ b/test.py @@ -1,22 +1,11 @@ from __future__ import annotations -import os from dataclasses import dataclass -os.environ.setdefault("FORCE_BINJA_MOCK", "1") - -# Installs a stubbed `binaryninja` module into `sys.modules`. -from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore -from binja_test_mocks import mock_llil -from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockReg, mllil, mreg +from binja_test_mocks.mock_llil import MockFlag, MockLLIL, mllil, mreg from .m68k import RTS_PASS_FLAGS -mock_llil.set_size_lookup( - {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, - {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, -) - @dataclass(frozen=True) class LabelRef: @@ -25,65 +14,85 @@ class LabelRef: name: str -def _l(name: str) -> LabelRef: - return LabelRef(name) - - -def _flag(name: str) -> MockFlag: - return MockFlag(name) - - -def _il(op: str, *ops: object) -> MockLLIL: - return mllil(op, list(ops)) - - def _maybe_restore_flags() -> list[MockLLIL]: if not RTS_PASS_FLAGS: return [] - return [_il("SET_FLAG", _flag("c"), _il("REG.b", mreg("rc")))] + return [mllil("SET_FLAG", [MockFlag("c"), mllil("REG.b", [mreg("rc")])])] def _rts_expected() -> list[MockLLIL]: out: list[MockLLIL] = [] if RTS_PASS_FLAGS: - out.append(_il("SET_REG.b", mreg("rc"), _il("FLAG", _flag("c")))) - out.append(_il("RET", _il("POP.d"))) + out.append(mllil("SET_REG.b", [mreg("rc"), mllil("FLAG", [MockFlag("c")])])) + out.append(mllil("RET", [mllil("POP.d", [])])) return out -test_cases: list[tuple[bytes, list[MockLLIL]]] = [ +test_cases: list[tuple[bytes, str, list[MockLLIL]]] = [ # moveq #$0000,d0 - (b"\x70\x00", [_il("SET_REG.d{nzvc}", mreg("d0"), _il("CONST.d", 0))]), + ( + b"\x70\x00", + "moveq #$0000,d0", + [mllil("SET_REG.d{nzvc}", [mreg("d0"), mllil("CONST.d", [0])])], + ), # subq.b #$1,d0 # FIXME: Generate flag 'x' ( b"\x53\x00", + "subq.b #$1,d0", [ - _il( + mllil( "SET_REG.b", - mreg("d0.b"), - _il("SUB.b{*}", _il("REG.b", mreg("d0")), _il("CONST.b", 1)), - ) + [ + mreg("d0.b"), + mllil( + "SUB.b{*}", + [ + mllil("REG.b", [mreg("d0")]), + mllil("CONST.b", [1]), + ], + ), + ], + ), ], ), # jsr 0x5dc1c, no arguments for this call - (b"\x4e\xb9\x00\x05\xdc\x1c", [_il("CALL", _il("CONST_PTR.d", 0x5DC1C))] + _maybe_restore_flags()), + ( + b"\x4e\xb9\x00\x05\xdc\x1c", + "jsr ($5dc1c)", + [mllil("CALL", [mllil("CONST_PTR.d", [0x5DC1C])])] + _maybe_restore_flags(), + ), # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call - (b"\x4e\xba\x00\x0a", [_il("CALL", _il("CONST_PTR.d", 0xC))] + _maybe_restore_flags()), + ( + b"\x4e\xba\x00\x0a", + "jsr ($0000000c)", + [mllil("CALL", [mllil("CONST_PTR.d", [0xC])])] + _maybe_restore_flags(), + ), # lea (data_7a9ee[2]),a1 - (b"\x43\xf9\x00\x07\xa9\xf0", [_il("SET_REG.d", mreg("a1"), _il("CONST_PTR.d", 0x7A9F0))]), + ( + b"\x43\xf9\x00\x07\xa9\xf0", + "lea ($7a9f0),a1", + [mllil("SET_REG.d", [mreg("a1"), mllil("CONST_PTR.d", [0x7A9F0])])], + ), # tst.w d1 ( b"\x4a\x41", + "tst.w d1", [ - _il("SUB.w{nz}", _il("REG.w", mreg("d1")), _il("CONST.w", 0)), - _il("SET_FLAG", _flag("v"), _il("CONST.b", 0)), - _il("SET_FLAG", _flag("c"), _il("CONST.b", 0)), + mllil( + "SUB.w{nz}", + [ + mllil("REG.w", [mreg("d1")]), + mllil("CONST.w", [0]), + ], + ), + mllil("SET_FLAG", [MockFlag("v"), mllil("CONST.b", [0])]), + mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])]), ], ), @@ -91,15 +100,32 @@ def _rts_expected() -> list[MockLLIL]: # move $-004(a0),$0074(a6) ( b"\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74", + "lea ($279d2e),a0\nmove $-004(a0),$0074(a6)", [ - _il("SET_REG.d", mreg("a0"), _il("CONST_PTR.d", 0x279D2E)), - _il( + mllil("SET_REG.d", [mreg("a0"), mllil("CONST_PTR.d", [0x279D2E])]), + mllil( "STORE.d{nzvc}", - _il("ADD.d", _il("REG.d", mreg("a6")), _il("CONST.w", 0x74)), - _il( - "LOAD.d", - _il("SUB.d", _il("REG.d", mreg("a0")), _il("CONST.w", 0x4)), - ), + [ + mllil( + "ADD.d", + [ + mllil("REG.d", [mreg("a6")]), + mllil("CONST.w", [0x74]), + ], + ), + mllil( + "LOAD.d", + [ + mllil( + "SUB.d", + [ + mllil("REG.d", [mreg("a0")]), + mllil("CONST.w", [0x4]), + ], + ) + ], + ), + ], ), ], ), @@ -107,92 +133,164 @@ def _rts_expected() -> list[MockLLIL]: # beq (data_10) ( b"\x67\x00\x00\x0e", + "beq ($00000010)", [ - _il("IF", _il("FLAG_COND", 0, None), _l("t"), _l("f")), - _il("JUMP", _il("CONST_PTR.d", 0x10)), + mllil( + "IF", + [ + mllil("FLAG_COND", [0, None]), + LabelRef("t"), + LabelRef("f"), + ], + ), + mllil("JUMP", [mllil("CONST_PTR.d", [0x10])]), ], ), # jmp ($4c862) - (b"\x4e\xf9\x00\x04\xc8\x62", [_il("JUMP", _il("CONST_PTR.d", 0x4C862))]), + ( + b"\x4e\xf9\x00\x04\xc8\x62", + "jmp ($4c862)", + [mllil("JUMP", [mllil("CONST_PTR.d", [0x4C862])])], + ), # bra (data_28) - (b"\x60\x00\x00\x26", [_il("JUMP", _il("CONST_PTR.d", 0x28))]), + ( + b"\x60\x00\x00\x26", + "bra ($00000028)", + [mllil("JUMP", [mllil("CONST_PTR.d", [0x28])])], + ), # dbf d7,(data_-2c) ( b"\x51\xcf\xff\xd4", + "dbf d7,($-000002a)", [ - _il("IF", _il("CONST.b", 0), _l("skip"), _l("decrement")), - _il( + mllil( + "IF", + [ + mllil("CONST.b", [0]), + LabelRef("skip"), + LabelRef("decrement"), + ], + ), + mllil( "SET_REG.w", - mreg("TEMP0"), - _il("SUB.w", _il("REG.w", mreg("d7")), _il("CONST.w", 1)), + [ + mreg("TEMP0"), + mllil( + "SUB.w", + [ + mllil("REG.w", [mreg("d7")]), + mllil("CONST.w", [1]), + ], + ), + ], ), - _il("SET_REG.w", mreg("d7.w"), _il("REG.w", mreg("TEMP0"))), - _il( + mllil("SET_REG.w", [mreg("d7.w"), mllil("REG.w", [mreg("TEMP0")])]), + mllil( "IF", - _il( - "CMP_E.w", - _il("REG.w", mreg("TEMP0")), - _il("CONST.w", -1), - ), - _l("skip"), - _l("branch"), + [ + mllil( + "CMP_E.w", + [ + mllil("REG.w", [mreg("TEMP0")]), + mllil("CONST.w", [-1]), + ], + ), + LabelRef("skip"), + LabelRef("branch"), + ], ), - _il("JUMP", _il("CONST_PTR.d", -42)), + mllil("JUMP", [mllil("CONST_PTR.d", [-42])]), ], ), # bcc (data_5a) ( b"\x64\x00\x00\x58", + "bcc ($0000005a)", [ - _il("IF", _il("FLAG_COND", 7, None), _l("t"), _l("f")), - _il("JUMP", _il("CONST_PTR.d", 0x5A)), + mllil( + "IF", + [ + mllil("FLAG_COND", [7, None]), + LabelRef("t"), + LabelRef("f"), + ], + ), + mllil("JUMP", [mllil("CONST_PTR.d", [0x5A])]), ], ), # rts - (b"\x4e\x75", _rts_expected()), + (b"\x4e\x75", "rts", _rts_expected()), # andi.b #$-2,ccr - (b"\x02\x3c\x00\xfe", [_il("SET_FLAG", _flag("c"), _il("CONST.b", 0))]), + ( + b"\x02\x3c\x00\xfe", + "andi.b #$-2,ccr", + [mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])])], + ), # ori.b #$1,ccr ( b"\x00\x3c\x00\x01", + "ori.b #$1,ccr", [ - _il("SET_FLAG", _flag("c"), _il("CONST.b", 1)), - _il("SET_FLAG", _flag("x"), _il("CONST.b", 1)), + mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [1])]), + mllil("SET_FLAG", [MockFlag("x"), mllil("CONST.b", [1])]), ], ), # scs.b d1 ( b"\x55\xc1", + "scs.b d1", [ - _il("IF", _il("FLAG_COND", 3, None), _l("set"), _l("clear")), - _il("SET_REG.b", mreg("d1.b"), _il("CONST.b", 1)), - _il("GOTO", _l("skip")), - _il("SET_REG.b", mreg("d1.b"), _il("CONST.b", 0)), - _il("GOTO", _l("skip")), + mllil( + "IF", + [ + mllil("FLAG_COND", [3, None]), + LabelRef("set"), + LabelRef("clear"), + ], + ), + mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [1])]), + mllil("GOTO", [LabelRef("skip")]), + mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [0])]), + mllil("GOTO", [LabelRef("skip")]), ], ), # swap d6 ( b"\x48\x46", - [_il("SET_REG.d", mreg("d6"), _il("ROR.d", _il("REG.d", mreg("d6")), _il("CONST.b", 0x10)))], + "swap d6", + [ + mllil( + "SET_REG.d", + [ + mreg("d6"), + mllil( + "ROR.d", + [ + mllil("REG.d", [mreg("d6")]), + mllil("CONST.b", [0x10]), + ], + ), + ], + ) + ], ), # rtr ( b"\x4e\x77", + "rtr", [ - _il("SET_REG.w", mreg("ccr"), _il("POP.w")), - _il("RET", _il("POP.d")), + mllil("SET_REG.w", [mreg("ccr"), mllil("POP.w", [])]), + mllil("RET", [mllil("POP.d", [])]), ], ), ] - diff --git a/tests/test_llil.py b/tests/test_llil.py index 4b83b3d..48e07d6 100644 --- a/tests/test_llil.py +++ b/tests/test_llil.py @@ -25,6 +25,22 @@ def _lift_to_llil(data: bytes, *, start_addr: int = 0) -> list[MockLLIL]: return list(il) +def _disasm(data: bytes, *, start_addr: int = 0) -> str: + arch = m68k_arch.M68000() + + offset = 0 + lines: list[str] = [] + while offset < len(data): + result = arch.get_instruction_text(data[offset:], start_addr + offset) + assert result is not None + tokens, length = result + assert length is not None and length > 0 + lines.append("".join(token.text for token in tokens).rstrip()) + offset += length + + return "\n".join(lines) + + def _mask_for_size(size_bytes: int) -> int: return (1 << (size_bytes * 8)) - 1 @@ -77,7 +93,7 @@ def assert_llil(actual: list[MockLLIL], expected: list[MockLLIL]) -> None: _match_node(act, exp, label_bindings) -@pytest.mark.parametrize("data, expected", m68k_test.test_cases) -def test_llil_regressions(data: bytes, expected: list[MockLLIL]) -> None: - assert_llil(_lift_to_llil(data), expected) - +@pytest.mark.parametrize("data, expected_disasm, expected_llil", m68k_test.test_cases) +def test_llil_regressions(data: bytes, expected_disasm: str, expected_llil: list[MockLLIL]) -> None: + assert _disasm(data) == expected_disasm + assert_llil(_lift_to_llil(data), expected_llil) diff --git a/uv.lock b/uv.lock index 865b6d0..b3069de 100644 --- a/uv.lock +++ b/uv.lock @@ -18,7 +18,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "binja-test-mocks", specifier = ">=0.1.7" }, + { name = "binja-test-mocks", specifier = ">=0.1.8" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] @@ -26,11 +26,11 @@ provides-extras = ["dev"] [[package]] name = "binja-test-mocks" -version = "0.1.7" +version = "0.1.8" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/7e/e3f12cbfbf2f170fa06d23d346cccf830f72cc11a7632b3d81c84942c160/binja_test_mocks-0.1.7.tar.gz", hash = "sha256:91a5d15659e1de2aed83f061029ee176f9d3196e9a9e38952a8ae9733ec6158c", size = 23579 } +sdist = { url = "https://files.pythonhosted.org/packages/51/26/82a0b6878bfb91cc8b08b46c3eea363a7c49f8acd263f52c08b634d465d1/binja_test_mocks-0.1.8.tar.gz", hash = "sha256:7cccbe50cd2c64ca45aeb672feae761f3fd82d704de21418b0e76ad6003b5c83", size = 23704 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/85/7e28f778265b89abc5656fb7a7449ff1197523b7aae466482f63b7a1601e/binja_test_mocks-0.1.7-py3-none-any.whl", hash = "sha256:90478540b78c747eb7f3e26fcc31711840086cfe5d65f066cb64e57bcab01395", size = 27825 }, + { url = "https://files.pythonhosted.org/packages/31/7e/b24badd4cce9e126f4436b474a1951f0dbe58f7fa755e6d4f927cc457d7c/binja_test_mocks-0.1.8-py3-none-any.whl", hash = "sha256:d953a273d5d23f557f7ab3248fbd1ca0edd13c3b3722dcb3d9825e53dc83a021", size = 27878 }, ] [[package]] From c2992ab3413e0833828048b35bbac5106ac980ea Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 14:53:09 +1100 Subject: [PATCH 37/46] Guard test mocks from plugin load --- AGENTS.md | 6 +- test.py | 583 ++++++++++++++++++++++++---------------------- tests/conftest.py | 40 ++-- 3 files changed, 326 insertions(+), 303 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9a48df1..622e3b8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,7 +6,7 @@ - `m68k.py`: core Architecture + lifter (LLIL generation). - `m68k_disasm.py` / `m68k_ops.py`: instruction decoding and operand helpers. - `plugin.json`: Plugin Manager metadata. -- `test.py`: LLIL regression tests (requires Binary Ninja’s Python API). +- `test.py`: regression case data for unit tests (disassembly + LLIL expectations). - `binja-esr/`: newer, more fully-tooled variant; follow `binja-esr/AGENTS.md` when contributing there. ## Build, Test, and Development Commands @@ -19,8 +19,6 @@ Tooling: use `uv` for dependency management and running commands. - Load locally: place this folder in your Binary Ninja plugins directory and restart Binary Ninja. - Syntax-only check (no Binary Ninja required): `python3 -m compileall .` -- Run tests inside Binary Ninja’s Python environment: - - In the Binary Ninja scripting console: `from m68k.test import test_all; assert test_all()` ## Coding Style & Naming Conventions @@ -32,7 +30,7 @@ Tooling: use `uv` for dependency management and running commands. ## Testing Guidelines - Add/adjust cases in `test.py` when changing decode or lifting behavior. -- Tests compare LLIL string output; update expected strings only for intentional behavior changes (note the Binary Ninja version if the IL printer changed). +- Tests validate both disassembly output (string) and lifted LLIL (structural `MockLLIL` trees). - For mock-based tests, use the shared helper dependency (`binja-test-mocks` / “binja-test-helpers”) instead of duplicating Binary Ninja API stubs in this repo; if an API surface is missing, expand it upstream and bump the dependency. ## Commit & Pull Request Guidelines diff --git a/test.py b/test.py index 9fa5f07..ebd569f 100644 --- a/test.py +++ b/test.py @@ -1,10 +1,12 @@ from __future__ import annotations from dataclasses import dataclass +import importlib.util +import sys +from typing import TYPE_CHECKING -from binja_test_mocks.mock_llil import MockFlag, MockLLIL, mllil, mreg - -from .m68k import RTS_PASS_FLAGS +if TYPE_CHECKING: + from binja_test_mocks.mock_llil import MockLLIL @dataclass(frozen=True) @@ -14,283 +16,298 @@ class LabelRef: name: str -def _maybe_restore_flags() -> list[MockLLIL]: - if not RTS_PASS_FLAGS: - return [] - return [mllil("SET_FLAG", [MockFlag("c"), mllil("REG.b", [mreg("rc")])])] - - -def _rts_expected() -> list[MockLLIL]: - out: list[MockLLIL] = [] - if RTS_PASS_FLAGS: - out.append(mllil("SET_REG.b", [mreg("rc"), mllil("FLAG", [MockFlag("c")])])) - out.append(mllil("RET", [mllil("POP.d", [])])) - return out - - -test_cases: list[tuple[bytes, str, list[MockLLIL]]] = [ - # moveq #$0000,d0 - ( - b"\x70\x00", - "moveq #$0000,d0", - [mllil("SET_REG.d{nzvc}", [mreg("d0"), mllil("CONST.d", [0])])], - ), - - # subq.b #$1,d0 - # FIXME: Generate flag 'x' - ( - b"\x53\x00", - "subq.b #$1,d0", - [ - mllil( - "SET_REG.b", - [ - mreg("d0.b"), - mllil( - "SUB.b{*}", - [ - mllil("REG.b", [mreg("d0")]), - mllil("CONST.b", [1]), - ], - ), - ], - ), - ], - ), - - # jsr 0x5dc1c, no arguments for this call - ( - b"\x4e\xb9\x00\x05\xdc\x1c", - "jsr ($5dc1c)", - [mllil("CALL", [mllil("CONST_PTR.d", [0x5DC1C])])] + _maybe_restore_flags(), - ), - - # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call - ( - b"\x4e\xba\x00\x0a", - "jsr ($0000000c)", - [mllil("CALL", [mllil("CONST_PTR.d", [0xC])])] + _maybe_restore_flags(), - ), - - # lea (data_7a9ee[2]),a1 - ( - b"\x43\xf9\x00\x07\xa9\xf0", - "lea ($7a9f0),a1", - [mllil("SET_REG.d", [mreg("a1"), mllil("CONST_PTR.d", [0x7A9F0])])], - ), - - # tst.w d1 - ( - b"\x4a\x41", - "tst.w d1", - [ - mllil( - "SUB.w{nz}", - [ - mllil("REG.w", [mreg("d1")]), - mllil("CONST.w", [0]), - ], - ), - mllil("SET_FLAG", [MockFlag("v"), mllil("CONST.b", [0])]), - mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])]), - ], - ), - - # lea ($279d2e),a0 - # move $-004(a0),$0074(a6) - ( - b"\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74", - "lea ($279d2e),a0\nmove $-004(a0),$0074(a6)", - [ - mllil("SET_REG.d", [mreg("a0"), mllil("CONST_PTR.d", [0x279D2E])]), - mllil( - "STORE.d{nzvc}", - [ - mllil( - "ADD.d", - [ - mllil("REG.d", [mreg("a6")]), - mllil("CONST.w", [0x74]), - ], - ), - mllil( - "LOAD.d", - [ - mllil( - "SUB.d", - [ - mllil("REG.d", [mreg("a0")]), - mllil("CONST.w", [0x4]), - ], - ) - ], - ), - ], - ), - ], - ), - - # beq (data_10) - ( - b"\x67\x00\x00\x0e", - "beq ($00000010)", - [ - mllil( - "IF", - [ - mllil("FLAG_COND", [0, None]), - LabelRef("t"), - LabelRef("f"), - ], - ), - mllil("JUMP", [mllil("CONST_PTR.d", [0x10])]), - ], - ), - - # jmp ($4c862) - ( - b"\x4e\xf9\x00\x04\xc8\x62", - "jmp ($4c862)", - [mllil("JUMP", [mllil("CONST_PTR.d", [0x4C862])])], - ), - - # bra (data_28) - ( - b"\x60\x00\x00\x26", - "bra ($00000028)", - [mllil("JUMP", [mllil("CONST_PTR.d", [0x28])])], - ), - - # dbf d7,(data_-2c) - ( - b"\x51\xcf\xff\xd4", - "dbf d7,($-000002a)", - [ - mllil( - "IF", - [ - mllil("CONST.b", [0]), - LabelRef("skip"), - LabelRef("decrement"), - ], - ), - mllil( - "SET_REG.w", - [ - mreg("TEMP0"), - mllil( - "SUB.w", - [ - mllil("REG.w", [mreg("d7")]), - mllil("CONST.w", [1]), - ], - ), - ], - ), - mllil("SET_REG.w", [mreg("d7.w"), mllil("REG.w", [mreg("TEMP0")])]), - mllil( - "IF", - [ - mllil( - "CMP_E.w", - [ - mllil("REG.w", [mreg("TEMP0")]), - mllil("CONST.w", [-1]), - ], - ), - LabelRef("skip"), - LabelRef("branch"), - ], - ), - mllil("JUMP", [mllil("CONST_PTR.d", [-42])]), - ], - ), - - # bcc (data_5a) - ( - b"\x64\x00\x00\x58", - "bcc ($0000005a)", - [ - mllil( - "IF", - [ - mllil("FLAG_COND", [7, None]), - LabelRef("t"), - LabelRef("f"), - ], - ), - mllil("JUMP", [mllil("CONST_PTR.d", [0x5A])]), - ], - ), - - # rts - (b"\x4e\x75", "rts", _rts_expected()), - - # andi.b #$-2,ccr - ( - b"\x02\x3c\x00\xfe", - "andi.b #$-2,ccr", - [mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])])], - ), - - # ori.b #$1,ccr - ( - b"\x00\x3c\x00\x01", - "ori.b #$1,ccr", - [ - mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [1])]), - mllil("SET_FLAG", [MockFlag("x"), mllil("CONST.b", [1])]), - ], - ), - - # scs.b d1 - ( - b"\x55\xc1", - "scs.b d1", - [ - mllil( - "IF", - [ - mllil("FLAG_COND", [3, None]), - LabelRef("set"), - LabelRef("clear"), - ], - ), - mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [1])]), - mllil("GOTO", [LabelRef("skip")]), - mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [0])]), - mllil("GOTO", [LabelRef("skip")]), - ], - ), - - # swap d6 - ( - b"\x48\x46", - "swap d6", - [ - mllil( - "SET_REG.d", - [ - mreg("d6"), - mllil( - "ROR.d", - [ - mllil("REG.d", [mreg("d6")]), - mllil("CONST.b", [0x10]), - ], - ), - ], - ) - ], - ), - - # rtr - ( - b"\x4e\x77", - "rtr", - [ - mllil("SET_REG.w", [mreg("ccr"), mllil("POP.w", [])]), - mllil("RET", [mllil("POP.d", [])]), - ], - ), -] +def _running_under_pytest() -> bool: + return any(name == "pytest" or name.startswith("_pytest") for name in sys.modules) + +def _running_inside_binary_ninja() -> bool: + try: + return importlib.util.find_spec("binaryninjaui") is not None + except (ValueError, ImportError): + return False + + +test_cases: list[tuple[bytes, str, list[MockLLIL]]] = [] + +if _running_under_pytest() and not _running_inside_binary_ninja(): + from binja_test_mocks.mock_llil import MockFlag, MockLLIL, mllil, mreg + + from .m68k import RTS_PASS_FLAGS + + def _maybe_restore_flags() -> list[MockLLIL]: + if not RTS_PASS_FLAGS: + return [] + return [mllil("SET_FLAG", [MockFlag("c"), mllil("REG.b", [mreg("rc")])])] + + def _rts_expected() -> list[MockLLIL]: + out: list[MockLLIL] = [] + if RTS_PASS_FLAGS: + out.append(mllil("SET_REG.b", [mreg("rc"), mllil("FLAG", [MockFlag("c")])])) + out.append(mllil("RET", [mllil("POP.d", [])])) + return out + + test_cases = [ + # moveq #$0000,d0 + ( + b"\x70\x00", + "moveq #$0000,d0", + [mllil("SET_REG.d{nzvc}", [mreg("d0"), mllil("CONST.d", [0])])], + ), + + # subq.b #$1,d0 + # FIXME: Generate flag 'x' + ( + b"\x53\x00", + "subq.b #$1,d0", + [ + mllil( + "SET_REG.b", + [ + mreg("d0.b"), + mllil( + "SUB.b{*}", + [ + mllil("REG.b", [mreg("d0")]), + mllil("CONST.b", [1]), + ], + ), + ], + ), + ], + ), + + # jsr 0x5dc1c, no arguments for this call + ( + b"\x4e\xb9\x00\x05\xdc\x1c", + "jsr ($5dc1c)", + [mllil("CALL", [mllil("CONST_PTR.d", [0x5DC1C])])] + _maybe_restore_flags(), + ), + + # at 0x53a, jsr 0x546, seems to be correctly interpreted as a call + ( + b"\x4e\xba\x00\x0a", + "jsr ($0000000c)", + [mllil("CALL", [mllil("CONST_PTR.d", [0xC])])] + _maybe_restore_flags(), + ), + + # lea (data_7a9ee[2]),a1 + ( + b"\x43\xf9\x00\x07\xa9\xf0", + "lea ($7a9f0),a1", + [mllil("SET_REG.d", [mreg("a1"), mllil("CONST_PTR.d", [0x7A9F0])])], + ), + + # tst.w d1 + ( + b"\x4a\x41", + "tst.w d1", + [ + mllil( + "SUB.w{nz}", + [ + mllil("REG.w", [mreg("d1")]), + mllil("CONST.w", [0]), + ], + ), + mllil("SET_FLAG", [MockFlag("v"), mllil("CONST.b", [0])]), + mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])]), + ], + ), + + # lea ($279d2e),a0 + # move $-004(a0),$0074(a6) + ( + b"\x41\xf9\x00\x27\x9d\x2e\x2d\x68\xff\xfc\x00\x74", + "lea ($279d2e),a0\nmove $-004(a0),$0074(a6)", + [ + mllil("SET_REG.d", [mreg("a0"), mllil("CONST_PTR.d", [0x279D2E])]), + mllil( + "STORE.d{nzvc}", + [ + mllil( + "ADD.d", + [ + mllil("REG.d", [mreg("a6")]), + mllil("CONST.w", [0x74]), + ], + ), + mllil( + "LOAD.d", + [ + mllil( + "SUB.d", + [ + mllil("REG.d", [mreg("a0")]), + mllil("CONST.w", [0x4]), + ], + ) + ], + ), + ], + ), + ], + ), + + # beq (data_10) + ( + b"\x67\x00\x00\x0e", + "beq ($00000010)", + [ + mllil( + "IF", + [ + mllil("FLAG_COND", [0, None]), + LabelRef("t"), + LabelRef("f"), + ], + ), + mllil("JUMP", [mllil("CONST_PTR.d", [0x10])]), + ], + ), + + # jmp ($4c862) + ( + b"\x4e\xf9\x00\x04\xc8\x62", + "jmp ($4c862)", + [mllil("JUMP", [mllil("CONST_PTR.d", [0x4C862])])], + ), + + # bra (data_28) + ( + b"\x60\x00\x00\x26", + "bra ($00000028)", + [mllil("JUMP", [mllil("CONST_PTR.d", [0x28])])], + ), + + # dbf d7,(data_-2c) + ( + b"\x51\xcf\xff\xd4", + "dbf d7,($-000002a)", + [ + mllil( + "IF", + [ + mllil("CONST.b", [0]), + LabelRef("skip"), + LabelRef("decrement"), + ], + ), + mllil( + "SET_REG.w", + [ + mreg("TEMP0"), + mllil( + "SUB.w", + [ + mllil("REG.w", [mreg("d7")]), + mllil("CONST.w", [1]), + ], + ), + ], + ), + mllil("SET_REG.w", [mreg("d7.w"), mllil("REG.w", [mreg("TEMP0")])]), + mllil( + "IF", + [ + mllil( + "CMP_E.w", + [ + mllil("REG.w", [mreg("TEMP0")]), + mllil("CONST.w", [-1]), + ], + ), + LabelRef("skip"), + LabelRef("branch"), + ], + ), + mllil("JUMP", [mllil("CONST_PTR.d", [-42])]), + ], + ), + + # bcc (data_5a) + ( + b"\x64\x00\x00\x58", + "bcc ($0000005a)", + [ + mllil( + "IF", + [ + mllil("FLAG_COND", [7, None]), + LabelRef("t"), + LabelRef("f"), + ], + ), + mllil("JUMP", [mllil("CONST_PTR.d", [0x5A])]), + ], + ), + + # rts + (b"\x4e\x75", "rts", _rts_expected()), + + # andi.b #$-2,ccr + ( + b"\x02\x3c\x00\xfe", + "andi.b #$-2,ccr", + [mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [0])])], + ), + + # ori.b #$1,ccr + ( + b"\x00\x3c\x00\x01", + "ori.b #$1,ccr", + [ + mllil("SET_FLAG", [MockFlag("c"), mllil("CONST.b", [1])]), + mllil("SET_FLAG", [MockFlag("x"), mllil("CONST.b", [1])]), + ], + ), + + # scs.b d1 + ( + b"\x55\xc1", + "scs.b d1", + [ + mllil( + "IF", + [ + mllil("FLAG_COND", [3, None]), + LabelRef("set"), + LabelRef("clear"), + ], + ), + mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [1])]), + mllil("GOTO", [LabelRef("skip")]), + mllil("SET_REG.b", [mreg("d1.b"), mllil("CONST.b", [0])]), + mllil("GOTO", [LabelRef("skip")]), + ], + ), + + # swap d6 + ( + b"\x48\x46", + "swap d6", + [ + mllil( + "SET_REG.d", + [ + mreg("d6"), + mllil( + "ROR.d", + [ + mllil("REG.d", [mreg("d6")]), + mllil("CONST.b", [0x10]), + ], + ), + ], + ) + ], + ), + + # rtr + ( + b"\x4e\x77", + "rtr", + [ + mllil("SET_REG.w", [mreg("ccr"), mllil("POP.w", [])]), + mllil("RET", [mllil("POP.d", [])]), + ], + ), + ] diff --git a/tests/conftest.py b/tests/conftest.py index 0f59a49..3620186 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,29 +1,37 @@ from __future__ import annotations +import importlib.util import os import sys import types from pathlib import Path -os.environ.setdefault("FORCE_BINJA_MOCK", "1") +def _running_under_pytest() -> bool: + return any(name == "pytest" or name.startswith("_pytest") for name in sys.modules) -# Installs a stubbed `binaryninja` module into `sys.modules`. -from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore -from binja_test_mocks import mock_llil +def _running_inside_binary_ninja() -> bool: + try: + return importlib.util.find_spec("binaryninjaui") is not None + except (ValueError, ImportError): + return False -mock_llil.set_size_lookup( - {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, - {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, -) +if _running_under_pytest() and not _running_inside_binary_ninja(): + os.environ.setdefault("FORCE_BINJA_MOCK", "1") + # Installs a stubbed `binaryninja` module into `sys.modules`. + from binja_test_mocks import binja_api # noqa: F401 # pyright: ignore + from binja_test_mocks import mock_llil -def _install_repo_as_m68k_package() -> None: - repo_root = Path(__file__).resolve().parents[1] - pkg = types.ModuleType("m68k") - pkg.__path__ = [str(repo_root)] - pkg.__file__ = str(repo_root / "__init__.py") - sys.modules["m68k"] = pkg + mock_llil.set_size_lookup( + {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, + {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, + ) + def _install_repo_as_m68k_package() -> None: + repo_root = Path(__file__).resolve().parents[1] + pkg = types.ModuleType("m68k") + pkg.__path__ = [str(repo_root)] + pkg.__file__ = str(repo_root / "__init__.py") + sys.modules["m68k"] = pkg -_install_repo_as_m68k_package() - + _install_repo_as_m68k_package() From 209448756bdc9c4d10641e80d4d67f98d40c6bb8 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 15:51:27 +1100 Subject: [PATCH 38/46] Use binja-esr init pattern --- __init__.py | 9 ++------- pyproject.toml | 2 +- uv.lock | 8 ++++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/__init__.py b/__init__.py index c7e36e7..59a3c33 100644 --- a/__init__.py +++ b/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations import importlib.util -import os import sys from pathlib import Path @@ -21,11 +20,9 @@ def module_exists(module_name: str) -> bool: return False if module_exists("binaryninja") and __package__: - from binaryninja import Architecture, BinaryViewType, CallingConvention + from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand from binaryninja.enums import Endianness - from binaryninja.plugin import PluginCommand - from .logging import __module__, log_debug from .m68k import ( M68000, M68008, @@ -40,7 +37,7 @@ def module_exists(module_name: str) -> bool: prompt_create_vector_table, ) - log_debug(f"m68k Plugin loaded from: {os.path.dirname(__module__.__loader__.path)}") + print(f"m68k Plugin loaded from: {_plugin_dir}") # PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) PluginCommand.register_for_address( @@ -68,5 +65,3 @@ class ParametersInRegistersCallingConvention(CallingConvention): arch = Architecture["M68000"] arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) - - BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) diff --git a/pyproject.toml b/pyproject.toml index d876d72..7b2ed89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ description = "Binary Ninja disassembler and lifter for the Motorola 68k archite readme = "README.md" requires-python = ">=3.11" dependencies = [ - "binja-test-mocks>=0.1.8", + "binja-test-mocks>=0.1.9", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index b3069de..8978c2f 100644 --- a/uv.lock +++ b/uv.lock @@ -18,7 +18,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "binja-test-mocks", specifier = ">=0.1.8" }, + { name = "binja-test-mocks", specifier = ">=0.1.9" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] @@ -26,11 +26,11 @@ provides-extras = ["dev"] [[package]] name = "binja-test-mocks" -version = "0.1.8" +version = "0.1.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/26/82a0b6878bfb91cc8b08b46c3eea363a7c49f8acd263f52c08b634d465d1/binja_test_mocks-0.1.8.tar.gz", hash = "sha256:7cccbe50cd2c64ca45aeb672feae761f3fd82d704de21418b0e76ad6003b5c83", size = 23704 } +sdist = { url = "https://files.pythonhosted.org/packages/85/24/9b6d535159103a8f1fe0ad9b05fcee4663495904ea1f0b9735d0cbba7dfa/binja_test_mocks-0.1.9.tar.gz", hash = "sha256:96a9ffb24e5ecc71cb105ad2de7876a729379555153c46b3e52ae32d80ad4b8f", size = 24445 } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/7e/b24badd4cce9e126f4436b474a1951f0dbe58f7fa755e6d4f927cc457d7c/binja_test_mocks-0.1.8-py3-none-any.whl", hash = "sha256:d953a273d5d23f557f7ab3248fbd1ca0edd13c3b3722dcb3d9825e53dc83a021", size = 27878 }, + { url = "https://files.pythonhosted.org/packages/30/08/8ae648e805b5689f62e410a44b7c91ac03a9cf72b351349371ad86fe801d/binja_test_mocks-0.1.9-py3-none-any.whl", hash = "sha256:4bf47313d216f5110444233c4be0a9689a8eeeb70442d96ea6740a986619d464", size = 28251 }, ] [[package]] From 783a7ff66ed2899a59acade09984781ad527a96b Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 16:07:22 +1100 Subject: [PATCH 39/46] Put binja-test-mocks in dev deps --- AGENTS.md | 4 ++-- pyproject.toml | 5 ++--- uv.lock | 6 ++---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 622e3b8..eef1a0b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,7 +25,7 @@ Tooling: use `uv` for dependency management and running commands. - Python 3, 4-space indentation; keep changes small and readable. - Prefer type hints where practical (matches the existing `m68k.py` style). - Naming: `snake_case` for functions/variables, `CamelCase` for classes, `UPPER_SNAKE_CASE` for constants. -- Use `logging.py` (`log_debug`, `log_info`, …) instead of `print` so output goes to the Binary Ninja log. +- Prefer plain `print(...)` for lightweight diagnostics; avoid importing `binaryninja.log` at import time so unit tests (and mocks) stay simple. ## Testing Guidelines @@ -36,7 +36,7 @@ Tooling: use `uv` for dependency management and running commands. ## Commit & Pull Request Guidelines - Commits use short, imperative summaries (e.g., “Fix TST flags”, “Add tests for calling functions”). -- PRs should include: what instructions/flags changed, how to reproduce, and confirmation that `test_all()` passes (or why it cannot). +- PRs should include: what instructions/flags changed, how to reproduce, and confirmation that `uv run pytest` passes (or why it cannot). - Open PRs against `mblsha/binaryninja-m68k` (this fork) unless explicitly coordinating with upstream; with GitHub CLI use `gh pr create --repo mblsha/binaryninja-m68k ...` to avoid targeting `galenbwill/binaryninja-m68k` by default. ## CI & Git Tips diff --git a/pyproject.toml b/pyproject.toml index 7b2ed89..546b361 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,12 +8,11 @@ version = "0.5.0" description = "Binary Ninja disassembler and lifter for the Motorola 68k architecture." readme = "README.md" requires-python = ">=3.11" -dependencies = [ - "binja-test-mocks>=0.1.9", -] +dependencies = [] [project.optional-dependencies] dev = [ + "binja-test-mocks>=0.1.9", "pytest>=7.0.0", "ruff>=0.1.0", ] diff --git a/uv.lock b/uv.lock index 8978c2f..befea80 100644 --- a/uv.lock +++ b/uv.lock @@ -6,19 +6,17 @@ requires-python = ">=3.11" name = "binaryninja-m68k" version = "0.5.0" source = { virtual = "." } -dependencies = [ - { name = "binja-test-mocks" }, -] [package.optional-dependencies] dev = [ + { name = "binja-test-mocks" }, { name = "pytest" }, { name = "ruff" }, ] [package.metadata] requires-dist = [ - { name = "binja-test-mocks", specifier = ">=0.1.9" }, + { name = "binja-test-mocks", marker = "extra == 'dev'", specifier = ">=0.1.9" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] From 0efc1905aa4131ea50d4201978672b32f6b202d1 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:00:03 +1100 Subject: [PATCH 40/46] Adopt src layout --- AGENTS.md | 10 ++-- __init__.py | 62 ++++++----------------- pyproject.toml | 7 ++- logging.py => src/m68k/logging.py | 0 m68k.py => src/m68k/m68k.py | 0 m68k_disasm.py => src/m68k/m68k_disasm.py | 0 m68k_ops.py => src/m68k/m68k_ops.py | 0 test.py => src/m68k/test.py | 0 tests/conftest.py | 17 +------ tests/test_llil.py | 6 ++- uv.lock | 10 ++-- 11 files changed, 35 insertions(+), 77 deletions(-) rename logging.py => src/m68k/logging.py (100%) rename m68k.py => src/m68k/m68k.py (100%) rename m68k_disasm.py => src/m68k/m68k_disasm.py (100%) rename m68k_ops.py => src/m68k/m68k_ops.py (100%) rename test.py => src/m68k/test.py (100%) diff --git a/AGENTS.md b/AGENTS.md index eef1a0b..77a78dc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,11 +2,11 @@ ## Project Structure & Module Organization -- `__init__.py`: Binary Ninja plugin entrypoint (registers architectures and commands). -- `m68k.py`: core Architecture + lifter (LLIL generation). -- `m68k_disasm.py` / `m68k_ops.py`: instruction decoding and operand helpers. +- `__init__.py`: Binary Ninja plugin shim (registers architectures/commands when loaded by Binary Ninja). +- `src/m68k/m68k.py`: core Architecture + lifter (LLIL generation). +- `src/m68k/m68k_disasm.py` / `src/m68k/m68k_ops.py`: instruction decoding and operand helpers. - `plugin.json`: Plugin Manager metadata. -- `test.py`: regression case data for unit tests (disassembly + LLIL expectations). +- `src/m68k/test.py`: regression case data for unit tests (disassembly + LLIL expectations). - `binja-esr/`: newer, more fully-tooled variant; follow `binja-esr/AGENTS.md` when contributing there. ## Build, Test, and Development Commands @@ -29,7 +29,7 @@ Tooling: use `uv` for dependency management and running commands. ## Testing Guidelines -- Add/adjust cases in `test.py` when changing decode or lifting behavior. +- Add/adjust cases in `src/m68k/test.py` when changing decode or lifting behavior. - Tests validate both disassembly output (string) and lifted LLIL (structural `MockLLIL` trees). - For mock-based tests, use the shared helper dependency (`binja-test-mocks` / “binja-test-helpers”) instead of duplicating Binary Ninja API stubs in this repo; if an API surface is missing, expand it upstream and bump the dependency. diff --git a/__init__.py b/__init__.py index 59a3c33..e827960 100644 --- a/__init__.py +++ b/__init__.py @@ -7,9 +7,19 @@ # Ensure the plugin directory is available on `sys.path` so that absolute # imports work when the plugin is loaded directly by Binary Ninja. -_plugin_dir = str(Path(__file__).resolve().parent) -if _plugin_dir not in sys.path: - sys.path.insert(0, _plugin_dir) +_plugin_dir = Path(__file__).resolve().parent +_plugin_dir_str = str(_plugin_dir) +if _plugin_dir_str not in sys.path: + sys.path.insert(0, _plugin_dir_str) + +# This repository uses a `src/` layout for the implementation package. When the +# plugin is loaded directly by Binary Ninja, extend the package search path so +# `from .m68k import ...` resolves to `src/m68k/...`. +_src_pkg_dir = _plugin_dir / "src" / "m68k" +if _src_pkg_dir.is_dir(): + _src_pkg_dir_str = str(_src_pkg_dir) + if _src_pkg_dir_str not in __path__: + __path__.append(_src_pkg_dir_str) def module_exists(module_name: str) -> bool: if module_name in sys.modules: @@ -20,48 +30,6 @@ def module_exists(module_name: str) -> bool: return False if module_exists("binaryninja") and __package__: - from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand - from binaryninja.enums import Endianness + from ._bn_plugin import register - from .m68k import ( - M68000, - M68008, - M68010, - M68020, - M68030, - M68040, - M68EC040, - M68LC040, - M68330, - M68340, - prompt_create_vector_table, - ) - - print(f"m68k Plugin loaded from: {_plugin_dir}") - - # PluginCommand.register("Create M68k vector table", "Create M68k vector table", prompt_create_vector_table) - PluginCommand.register_for_address( - "Create M68k vector table", - "Create M68k vector table", - prompt_create_vector_table, - ) - - M68000.register() - M68008.register() - M68010.register() - M68020.register() - M68030.register() - M68040.register() - M68LC040.register() - M68EC040.register() - M68330.register() - M68340.register() - - # BinaryViewType['ELF'].register_arch(4, Endianness.BigEndian, Architecture['M68030']) - BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) - - class ParametersInRegistersCallingConvention(CallingConvention): - name = "ParametersInRegisters" - - arch = Architecture["M68000"] - arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) + register(plugin_dir=_plugin_dir) diff --git a/pyproject.toml b/pyproject.toml index 546b361..92e3f2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,11 +12,14 @@ dependencies = [] [project.optional-dependencies] dev = [ - "binja-test-mocks>=0.1.9", + "binja-test-mocks>=0.1.10", "pytest>=7.0.0", "ruff>=0.1.0", ] +[tool.hatch.build.targets.wheel] +packages = ["src/m68k"] + [tool.pytest.ini_options] testpaths = ["tests"] norecursedirs = ["binja-esr"] @@ -41,4 +44,4 @@ ignore = [ ] [tool.uv] -package = false +package = true diff --git a/logging.py b/src/m68k/logging.py similarity index 100% rename from logging.py rename to src/m68k/logging.py diff --git a/m68k.py b/src/m68k/m68k.py similarity index 100% rename from m68k.py rename to src/m68k/m68k.py diff --git a/m68k_disasm.py b/src/m68k/m68k_disasm.py similarity index 100% rename from m68k_disasm.py rename to src/m68k/m68k_disasm.py diff --git a/m68k_ops.py b/src/m68k/m68k_ops.py similarity index 100% rename from m68k_ops.py rename to src/m68k/m68k_ops.py diff --git a/test.py b/src/m68k/test.py similarity index 100% rename from test.py rename to src/m68k/test.py diff --git a/tests/conftest.py b/tests/conftest.py index 3620186..6fd83bc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,12 +2,6 @@ import importlib.util import os -import sys -import types -from pathlib import Path - -def _running_under_pytest() -> bool: - return any(name == "pytest" or name.startswith("_pytest") for name in sys.modules) def _running_inside_binary_ninja() -> bool: try: @@ -15,7 +9,7 @@ def _running_inside_binary_ninja() -> bool: except (ValueError, ImportError): return False -if _running_under_pytest() and not _running_inside_binary_ninja(): +if not _running_inside_binary_ninja(): os.environ.setdefault("FORCE_BINJA_MOCK", "1") # Installs a stubbed `binaryninja` module into `sys.modules`. @@ -26,12 +20,3 @@ def _running_inside_binary_ninja() -> bool: {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}, {"b": 1, "w": 2, "d": 4, "q": 8, "o": 16}, ) - - def _install_repo_as_m68k_package() -> None: - repo_root = Path(__file__).resolve().parents[1] - pkg = types.ModuleType("m68k") - pkg.__path__ = [str(repo_root)] - pkg.__file__ = str(repo_root / "__init__.py") - sys.modules["m68k"] = pkg - - _install_repo_as_m68k_package() diff --git a/tests/test_llil.py b/tests/test_llil.py index 48e07d6..c154b9d 100644 --- a/tests/test_llil.py +++ b/tests/test_llil.py @@ -5,7 +5,7 @@ import pytest from binaryninja import lowlevelil -from binja_test_mocks.mock_llil import MockFlag, MockLLIL, MockReg +from binja_test_mocks.mock_llil import MockFlag, MockLabel, MockLLIL, MockReg m68k_test = importlib.import_module("m68k.test") m68k_arch = importlib.import_module("m68k.m68k") @@ -22,7 +22,9 @@ def _lift_to_llil(data: bytes, *, start_addr: int = 0) -> list[MockLLIL]: assert length is not None and length > 0 offset += length - return list(il) + # The mock IL appends LABEL pseudo-nodes for control-flow; ignore those so + # test cases can focus on the executable LLIL operations. + return [node for node in il if not isinstance(node, MockLabel)] def _disasm(data: bytes, *, start_addr: int = 0) -> str: diff --git a/uv.lock b/uv.lock index befea80..be6bdd3 100644 --- a/uv.lock +++ b/uv.lock @@ -5,7 +5,7 @@ requires-python = ">=3.11" [[package]] name = "binaryninja-m68k" version = "0.5.0" -source = { virtual = "." } +source = { editable = "." } [package.optional-dependencies] dev = [ @@ -16,7 +16,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "binja-test-mocks", marker = "extra == 'dev'", specifier = ">=0.1.9" }, + { name = "binja-test-mocks", marker = "extra == 'dev'", specifier = ">=0.1.10" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, ] @@ -24,11 +24,11 @@ provides-extras = ["dev"] [[package]] name = "binja-test-mocks" -version = "0.1.9" +version = "0.1.10" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/85/24/9b6d535159103a8f1fe0ad9b05fcee4663495904ea1f0b9735d0cbba7dfa/binja_test_mocks-0.1.9.tar.gz", hash = "sha256:96a9ffb24e5ecc71cb105ad2de7876a729379555153c46b3e52ae32d80ad4b8f", size = 24445 } +sdist = { url = "https://files.pythonhosted.org/packages/9d/7c/e06ce363829d2ad44fbe606a06ce4e4eb853acad4b24e4c2ab358fc5b18c/binja_test_mocks-0.1.10.tar.gz", hash = "sha256:7e2c9336db57c48dd6cb053737694f367b4baa7b9fdfa4f66c93a5f64498ad1b", size = 24845 } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/08/8ae648e805b5689f62e410a44b7c91ac03a9cf72b351349371ad86fe801d/binja_test_mocks-0.1.9-py3-none-any.whl", hash = "sha256:4bf47313d216f5110444233c4be0a9689a8eeeb70442d96ea6740a986619d464", size = 28251 }, + { url = "https://files.pythonhosted.org/packages/81/e8/292b63f73a8cd3fef51d228950ee6d95485e53f44d899ee8713dda546acc/binja_test_mocks-0.1.10-py3-none-any.whl", hash = "sha256:cfb1d54723d86dca490c15fca486e0854c8e9af46b562f78278028d23a633af7", size = 28278 }, ] [[package]] From 968380fc19e4cc3687e1e108cbceed478aae52bc Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:00:15 +1100 Subject: [PATCH 41/46] Add m68k package entrypoints --- src/m68k/__init__.py | 7 ++++++ src/m68k/_bn_plugin.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/m68k/__init__.py create mode 100644 src/m68k/_bn_plugin.py diff --git a/src/m68k/__init__.py b/src/m68k/__init__.py new file mode 100644 index 0000000..abca709 --- /dev/null +++ b/src/m68k/__init__.py @@ -0,0 +1,7 @@ +"""Motorola 68k Binary Ninja plugin implementation. + +The Binary Ninja plugin entrypoint lives at the repository root `__init__.py`. +This package exists to support a `src/` layout and normal imports during unit +testing (via `uv run pytest`). +""" + diff --git a/src/m68k/_bn_plugin.py b/src/m68k/_bn_plugin.py new file mode 100644 index 0000000..e2a9311 --- /dev/null +++ b/src/m68k/_bn_plugin.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from pathlib import Path + + +def register(*, plugin_dir: Path) -> None: + from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand + from binaryninja.enums import Endianness + + from .m68k import ( + M68000, + M68008, + M68010, + M68020, + M68030, + M68040, + M68EC040, + M68LC040, + M68330, + M68340, + prompt_create_vector_table, + ) + + print(f"m68k Plugin loaded from: {plugin_dir}") + + PluginCommand.register_for_address( + "Create M68k vector table", + "Create M68k vector table", + prompt_create_vector_table, + ) + + M68000.register() + M68008.register() + M68010.register() + M68020.register() + M68030.register() + M68040.register() + M68LC040.register() + M68EC040.register() + M68330.register() + M68340.register() + + BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) + + class ParametersInRegistersCallingConvention(CallingConvention): + name = "ParametersInRegisters" + + arch = Architecture["M68000"] + arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) + From d8f725a24f29efd9e8cb8fdc35d5bc247efa16b3 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:07:52 +1100 Subject: [PATCH 42/46] Make plugin shim safe under mocks --- __init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index e827960..cbbf4fb 100644 --- a/__init__.py +++ b/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import importlib.util +import os import sys from pathlib import Path @@ -12,14 +13,21 @@ if _plugin_dir_str not in sys.path: sys.path.insert(0, _plugin_dir_str) +# Some tooling can import this file as a plain module (not a package). Make the +# module behave like a package so `m68k.*` submodules remain importable. +_package_paths = globals().get("__path__") +if _package_paths is None: + __path__ = [_plugin_dir_str] # type: ignore[var-annotated] + _package_paths = __path__ + # This repository uses a `src/` layout for the implementation package. When the # plugin is loaded directly by Binary Ninja, extend the package search path so # `from .m68k import ...` resolves to `src/m68k/...`. _src_pkg_dir = _plugin_dir / "src" / "m68k" if _src_pkg_dir.is_dir(): _src_pkg_dir_str = str(_src_pkg_dir) - if _src_pkg_dir_str not in __path__: - __path__.append(_src_pkg_dir_str) + if _src_pkg_dir_str not in _package_paths: + _package_paths.append(_src_pkg_dir_str) def module_exists(module_name: str) -> bool: if module_name in sys.modules: @@ -29,7 +37,9 @@ def module_exists(module_name: str) -> bool: except (ValueError, ImportError): return False -if module_exists("binaryninja") and __package__: +_force_mock = os.environ.get("FORCE_BINJA_MOCK", "").lower() in ("1", "true", "yes") + +if module_exists("binaryninja") and __package__ and not _force_mock: from ._bn_plugin import register register(plugin_dir=_plugin_dir) From ec66c9d6f05f9c05cc36678ff7a8c35280704e77 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:35:25 +1100 Subject: [PATCH 43/46] Add optional debug prints to plugin register --- AGENTS.md | 1 + src/m68k/_bn_plugin.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 77a78dc..cb5ed14 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,6 +16,7 @@ Tooling: use `uv` for dependency management and running commands. - Install dev deps: `uv sync --extra dev` - Lint: `uv run ruff check .` - Unit tests (uses mocks, no Binary Ninja required): `uv run pytest` +- Optional plugin init debug prints: `M68K_DEBUG=1` - Load locally: place this folder in your Binary Ninja plugins directory and restart Binary Ninja. - Syntax-only check (no Binary Ninja required): `python3 -m compileall .` diff --git a/src/m68k/_bn_plugin.py b/src/m68k/_bn_plugin.py index e2a9311..1337023 100644 --- a/src/m68k/_bn_plugin.py +++ b/src/m68k/_bn_plugin.py @@ -1,9 +1,18 @@ from __future__ import annotations +import os from pathlib import Path def register(*, plugin_dir: Path) -> None: + debug = os.environ.get("M68K_DEBUG", "").lower() in ("1", "true", "yes") + + def _debug(msg: str) -> None: + if debug: + print(f"m68k[debug] {msg}") + + _debug("Starting plugin registration") + from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand from binaryninja.enums import Endianness @@ -22,6 +31,7 @@ def register(*, plugin_dir: Path) -> None: ) print(f"m68k Plugin loaded from: {plugin_dir}") + _debug("Registering PluginCommand: Create M68k vector table") PluginCommand.register_for_address( "Create M68k vector table", @@ -29,6 +39,7 @@ def register(*, plugin_dir: Path) -> None: prompt_create_vector_table, ) + _debug("Registering architectures: M68000/M68008/M68010/M68020/M68030/M68040/M68LC040/M68EC040/M68330/M68340") M68000.register() M68008.register() M68010.register() @@ -40,11 +51,14 @@ def register(*, plugin_dir: Path) -> None: M68330.register() M68340.register() + _debug("Registering ELF arch mapping: (4, BigEndian) -> M68030") BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) class ParametersInRegistersCallingConvention(CallingConvention): name = "ParametersInRegisters" + _debug("Registering calling convention: ParametersInRegisters (M68000 default)") arch = Architecture["M68000"] arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) + _debug("Finished plugin registration") From 54993fb9040a667fc32aaa5d9ff40364ffd3e8a6 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:40:52 +1100 Subject: [PATCH 44/46] Always print plugin registration debug --- AGENTS.md | 2 +- __init__.py | 46 +++++++++++++++++++++++++++++++++++++++--- src/m68k/_bn_plugin.py | 8 ++------ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index cb5ed14..eecd224 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,7 @@ Tooling: use `uv` for dependency management and running commands. - Install dev deps: `uv sync --extra dev` - Lint: `uv run ruff check .` - Unit tests (uses mocks, no Binary Ninja required): `uv run pytest` -- Optional plugin init debug prints: `M68K_DEBUG=1` +- Plugin init always prints debug output (useful when diagnosing load/registration issues). - Load locally: place this folder in your Binary Ninja plugins directory and restart Binary Ninja. - Syntax-only check (no Binary Ninja required): `python3 -m compileall .` diff --git a/__init__.py b/__init__.py index cbbf4fb..033f918 100644 --- a/__init__.py +++ b/__init__.py @@ -3,6 +3,7 @@ import importlib.util import os import sys +import traceback from pathlib import Path @@ -37,9 +38,48 @@ def module_exists(module_name: str) -> bool: except (ValueError, ImportError): return False -_force_mock = os.environ.get("FORCE_BINJA_MOCK", "").lower() in ("1", "true", "yes") +def _running_inside_binary_ninja() -> bool: + try: + if module_exists("binaryninjaui"): + return True + except Exception: + pass + + exe = (sys.executable or "").lower() + if "binary ninja.app" in exe: + return True + return os.path.basename(exe) in ("binaryninja", "binaryninja.exe") + -if module_exists("binaryninja") and __package__ and not _force_mock: +_force_mock_requested = os.environ.get("FORCE_BINJA_MOCK", "").lower() in ("1", "true", "yes") +_running_binja = _running_inside_binary_ninja() +_skip_registration = _force_mock_requested and not _running_binja + +print( + "m68k[debug] shim loaded " + f"(name={__name__!r}, package={__package__!r}, plugin_dir={_plugin_dir}, " + f"force_mock={_force_mock_requested}, running_binja={_running_binja})" +) +print(f"m68k[debug] implementation_dir={_src_pkg_dir} exists={_src_pkg_dir.is_dir()}") + +_has_binaryninja = module_exists("binaryninja") +print(f"m68k[debug] binaryninja_available={_has_binaryninja} will_register={bool(_has_binaryninja and __package__ and not _skip_registration)}") + +if _has_binaryninja and __package__ and not _skip_registration: from ._bn_plugin import register - register(plugin_dir=_plugin_dir) + print("m68k[debug] calling _bn_plugin.register()") + try: + register(plugin_dir=_plugin_dir) + except Exception: + print("m68k[debug] _bn_plugin.register() raised:") + traceback.print_exc() + raise + print("m68k[debug] _bn_plugin.register() completed") +else: + if not __package__: + print("m68k[debug] skipping registration (not imported as a package)") + elif _skip_registration: + print("m68k[debug] skipping registration (FORCE_BINJA_MOCK set and not running inside Binary Ninja)") + elif not _has_binaryninja: + print("m68k[debug] skipping registration (binaryninja module not available)") diff --git a/src/m68k/_bn_plugin.py b/src/m68k/_bn_plugin.py index 1337023..7a813d5 100644 --- a/src/m68k/_bn_plugin.py +++ b/src/m68k/_bn_plugin.py @@ -1,17 +1,13 @@ from __future__ import annotations -import os from pathlib import Path def register(*, plugin_dir: Path) -> None: - debug = os.environ.get("M68K_DEBUG", "").lower() in ("1", "true", "yes") - def _debug(msg: str) -> None: - if debug: - print(f"m68k[debug] {msg}") + print(f"m68k[debug] {msg}") - _debug("Starting plugin registration") + _debug(f"Starting plugin registration (plugin_dir={plugin_dir})") from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand from binaryninja.enums import Endianness From b52b7ddd8217ca2316a83a6a5a0d9fc0546da124 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 17:46:41 +1100 Subject: [PATCH 45/46] Remove temporary debug prints --- AGENTS.md | 1 - __init__.py | 25 +------------------------ src/m68k/_bn_plugin.py | 11 ----------- 3 files changed, 1 insertion(+), 36 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index eecd224..77a78dc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,6 @@ Tooling: use `uv` for dependency management and running commands. - Install dev deps: `uv sync --extra dev` - Lint: `uv run ruff check .` - Unit tests (uses mocks, no Binary Ninja required): `uv run pytest` -- Plugin init always prints debug output (useful when diagnosing load/registration issues). - Load locally: place this folder in your Binary Ninja plugins directory and restart Binary Ninja. - Syntax-only check (no Binary Ninja required): `python3 -m compileall .` diff --git a/__init__.py b/__init__.py index 033f918..7c7a886 100644 --- a/__init__.py +++ b/__init__.py @@ -3,7 +3,6 @@ import importlib.util import os import sys -import traceback from pathlib import Path @@ -55,31 +54,9 @@ def _running_inside_binary_ninja() -> bool: _running_binja = _running_inside_binary_ninja() _skip_registration = _force_mock_requested and not _running_binja -print( - "m68k[debug] shim loaded " - f"(name={__name__!r}, package={__package__!r}, plugin_dir={_plugin_dir}, " - f"force_mock={_force_mock_requested}, running_binja={_running_binja})" -) -print(f"m68k[debug] implementation_dir={_src_pkg_dir} exists={_src_pkg_dir.is_dir()}") - _has_binaryninja = module_exists("binaryninja") -print(f"m68k[debug] binaryninja_available={_has_binaryninja} will_register={bool(_has_binaryninja and __package__ and not _skip_registration)}") if _has_binaryninja and __package__ and not _skip_registration: from ._bn_plugin import register - print("m68k[debug] calling _bn_plugin.register()") - try: - register(plugin_dir=_plugin_dir) - except Exception: - print("m68k[debug] _bn_plugin.register() raised:") - traceback.print_exc() - raise - print("m68k[debug] _bn_plugin.register() completed") -else: - if not __package__: - print("m68k[debug] skipping registration (not imported as a package)") - elif _skip_registration: - print("m68k[debug] skipping registration (FORCE_BINJA_MOCK set and not running inside Binary Ninja)") - elif not _has_binaryninja: - print("m68k[debug] skipping registration (binaryninja module not available)") + register(plugin_dir=_plugin_dir) diff --git a/src/m68k/_bn_plugin.py b/src/m68k/_bn_plugin.py index 7a813d5..3d34763 100644 --- a/src/m68k/_bn_plugin.py +++ b/src/m68k/_bn_plugin.py @@ -4,11 +4,6 @@ def register(*, plugin_dir: Path) -> None: - def _debug(msg: str) -> None: - print(f"m68k[debug] {msg}") - - _debug(f"Starting plugin registration (plugin_dir={plugin_dir})") - from binaryninja import Architecture, BinaryViewType, CallingConvention, PluginCommand from binaryninja.enums import Endianness @@ -27,7 +22,6 @@ def _debug(msg: str) -> None: ) print(f"m68k Plugin loaded from: {plugin_dir}") - _debug("Registering PluginCommand: Create M68k vector table") PluginCommand.register_for_address( "Create M68k vector table", @@ -35,7 +29,6 @@ def _debug(msg: str) -> None: prompt_create_vector_table, ) - _debug("Registering architectures: M68000/M68008/M68010/M68020/M68030/M68040/M68LC040/M68EC040/M68330/M68340") M68000.register() M68008.register() M68010.register() @@ -47,14 +40,10 @@ def _debug(msg: str) -> None: M68330.register() M68340.register() - _debug("Registering ELF arch mapping: (4, BigEndian) -> M68030") BinaryViewType["ELF"].register_arch(4, Endianness.BigEndian, Architecture["M68030"]) class ParametersInRegistersCallingConvention(CallingConvention): name = "ParametersInRegisters" - _debug("Registering calling convention: ParametersInRegisters (M68000 default)") arch = Architecture["M68000"] arch.register_calling_convention(ParametersInRegistersCallingConvention(arch, "default")) - - _debug("Finished plugin registration") From 3fb33f6b1f2574de8b96487791a6d4976af92121 Mon Sep 17 00:00:00 2001 From: Michael Pishchagin Date: Thu, 25 Dec 2025 18:12:29 +1100 Subject: [PATCH 46/46] Make RTS flag passing arch-specific --- src/m68k/m68k.py | 16 ++++++++---- src/m68k/test.py | 8 +++--- tests/test_rts_pass_flags.py | 49 ++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 8 deletions(-) create mode 100644 tests/test_rts_pass_flags.py diff --git a/src/m68k/m68k.py b/src/m68k/m68k.py index d3de481..8397c77 100644 --- a/src/m68k/m68k.py +++ b/src/m68k/m68k.py @@ -79,8 +79,13 @@ LowLevelILFlagCondition.LLFC_SLE: ['n', 'v', 'z'], # le } -# hack for programs that rely on flags not being modified after `rts`. -RTS_PASS_FLAGS = False +# Hack for binaries that use flags as a return value from subroutines. +# +# When enabled, `rts` writes the carry flag to a fake register (`rc`) and +# `jsr`/`bsr` restores the carry flag from that register after the call. +RTS_PASS_FLAGS_BY_ARCH = { + "M68000": True, +} class M68000(Architecture): name = "M68000" @@ -211,10 +216,12 @@ class M68000(Architecture): } memory_indirect = False movem_store_decremented = False + rts_pass_flags = False def __init__(self): Architecture.__init__(self) self.disasm = M68KDisasm(self.address_size, self.control_registers) + self.rts_pass_flags = RTS_PASS_FLAGS_BY_ARCH.get(self.name, False) def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: int, size: int, source: Optional[Operand], dest: Optional[Operand], third: Optional[Operand]): size_bytes = None @@ -1338,7 +1345,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in il.append( il.call(dest.get_address_il(il)) ) - if RTS_PASS_FLAGS: + if self.rts_pass_flags: il.append(il.set_flag('c', il.reg(1, 'rc'))) elif instr == 'callm': # TODO @@ -1566,7 +1573,7 @@ def generate_instruction_il(self, il: LowLevelILFunction, instr: str, length: in ) ) elif instr == 'rts': - if RTS_PASS_FLAGS: + if self.rts_pass_flags: il.append(il.set_reg(1, 'rc', il.flag('c'))) il.append( @@ -2108,4 +2115,3 @@ def prompt_create_vector_table(view, addr=None): view.platform = Architecture[arch].standalone_platform create_vector_table(view, address, size) - diff --git a/src/m68k/test.py b/src/m68k/test.py index ebd569f..d4e2759 100644 --- a/src/m68k/test.py +++ b/src/m68k/test.py @@ -31,16 +31,18 @@ def _running_inside_binary_ninja() -> bool: if _running_under_pytest() and not _running_inside_binary_ninja(): from binja_test_mocks.mock_llil import MockFlag, MockLLIL, mllil, mreg - from .m68k import RTS_PASS_FLAGS + from .m68k import M68000 + + _RTS_PASS_FLAGS = M68000().rts_pass_flags def _maybe_restore_flags() -> list[MockLLIL]: - if not RTS_PASS_FLAGS: + if not _RTS_PASS_FLAGS: return [] return [mllil("SET_FLAG", [MockFlag("c"), mllil("REG.b", [mreg("rc")])])] def _rts_expected() -> list[MockLLIL]: out: list[MockLLIL] = [] - if RTS_PASS_FLAGS: + if _RTS_PASS_FLAGS: out.append(mllil("SET_REG.b", [mreg("rc"), mllil("FLAG", [MockFlag("c")])])) out.append(mllil("RET", [mllil("POP.d", [])])) return out diff --git a/tests/test_rts_pass_flags.py b/tests/test_rts_pass_flags.py new file mode 100644 index 0000000..fbaed47 --- /dev/null +++ b/tests/test_rts_pass_flags.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import importlib + +from binaryninja import lowlevelil +from binja_test_mocks.mock_llil import MockFlag, MockLabel, MockLLIL, mllil, mreg + +m68k_arch = importlib.import_module("m68k.m68k") + + +def _lift_to_llil(arch: object, data: bytes) -> list[MockLLIL]: + il = lowlevelil.LowLevelILFunction(arch) + + offset = 0 + while offset < len(data): + il.current_address = offset # type: ignore[attr-defined] + length = arch.get_instruction_low_level_il(data[offset:], offset, il) # type: ignore[attr-defined] + assert length is not None and length > 0 + offset += length + + return [node for node in il if not isinstance(node, MockLabel)] + + +def test_rts_pass_flags_is_arch_specific() -> None: + assert m68k_arch.M68000().rts_pass_flags is True + assert m68k_arch.M68010().rts_pass_flags is False + + +def test_rts_pass_flags_affects_call_and_rts_llil() -> None: + jsr_abs = b"\x4e\xb9\x00\x05\xdc\x1c" # jsr ($5dc1c) + rts = b"\x4e\x75" + + expected_jsr_disabled = [mllil("CALL", [mllil("CONST_PTR.d", [0x5DC1C])])] + expected_jsr_enabled = expected_jsr_disabled + [ + mllil("SET_FLAG", [MockFlag("c"), mllil("REG.b", [mreg("rc")])]) + ] + + expected_rts_disabled = [mllil("RET", [mllil("POP.d", [])])] + expected_rts_enabled = [ + mllil("SET_REG.b", [mreg("rc"), mllil("FLAG", [MockFlag("c")])]), + *expected_rts_disabled, + ] + + assert _lift_to_llil(m68k_arch.M68010(), jsr_abs) == expected_jsr_disabled + assert _lift_to_llil(m68k_arch.M68000(), jsr_abs) == expected_jsr_enabled + + assert _lift_to_llil(m68k_arch.M68010(), rts) == expected_rts_disabled + assert _lift_to_llil(m68k_arch.M68000(), rts) == expected_rts_enabled +