Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions arch/x86/il.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2100,15 +2100,47 @@ bool GetLowLevelILForInstruction(Architecture* arch, const uint64_t addr, LowLev
}

case XED_ICLASS_MOV:
il.AddInstruction(
WriteILOperand(il, xedd, addr, 0, 0,
ReadILOperand(il, xedd, addr, 1, 1)));
break;

case XED_ICLASS_MOVD:
case XED_ICLASS_MOVQ:
case XED_ICLASS_VMOVD:
case XED_ICLASS_VMOVQ:
case XED_ICLASS_MOVDIRI:
if (opOneLen > opTwoLen)
{
// This may add unneeded zero-extends, but MLIL will optimize them out
il.AddInstruction(
WriteILOperand(il, xedd, addr, 0, 0,
il.ZeroExtend(opOneLen,
ReadILOperand(il, xedd, addr, 1, 1, opTwoLen)),
opOneLen));
}
else
{
il.AddInstruction(
WriteILOperand(il, xedd, addr, 0, 0,
ReadILOperand(il, xedd, addr, 1, 1, opTwoLen),
opOneLen));
}
break;

case XED_ICLASS_MOVDIR64B:
il.AddInstruction(
WriteILOperand(il, xedd, addr, 0, 0,
ReadILOperand(il, xedd, addr, 1, 1)));
// Special case:
// Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address.
// The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register.
// ...
// MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.
//
// Lifting to the intrinsic is still semantically inaccurate, but we don't currently have a way to represent atomic writes:
// movdir64b rax, zmmword [rbx]
//
// temp0 = _movdir64b(rax, [rbx].64, rax)
// [rax].64 = temp0.64
LiftAsIntrinsic();
break;

case XED_ICLASS_MOVSX:
Expand Down
200 changes: 146 additions & 54 deletions arch/x86/test_lifting.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,110 @@
#!/usr/bin/env python

tests_interrupts = [
(b'\xcd\x00', 'LLIL_TRAP(0)'), # int 0
(b'\xcd\x01', 'LLIL_TRAP(1)'), # int 1
(b'\xcd\x02', 'LLIL_TRAP(2)'), # int 2
(b'\xcd\x03', 'LLIL_TRAP(3)'), # int 3
(b'\xcd\x04', 'LLIL_TRAP(4)'), # int 4
(b'\xcd\x05', 'LLIL_TRAP(5)'), # int 5
(b'\xcd\x06', 'LLIL_TRAP(6)'), # int 6
(b'\xcd\x07', 'LLIL_TRAP(7)'), # int 7
(b'\xcd\x08', 'LLIL_TRAP(8)'), # int 8
(b'\xcd\x09', 'LLIL_TRAP(9)'), # int 9
(b'\xcd\x0A', 'LLIL_TRAP(10)'), # int 10
(b'\xcd\x0B', 'LLIL_TRAP(11)'), # int 11
(b'\xcd\x0C', 'LLIL_TRAP(12)'), # int 12
(b'\xcd\x0D', 'LLIL_TRAP(13)'), # int 13
(b'\xcd\x0E', 'LLIL_TRAP(14)'), # int 14
(b'\xcd\x0F', 'LLIL_TRAP(15)'), # int 15
(b'\xcd\x29', 'LLIL_TRAP(13)'), # int 0x29 is lifted as TRAP_GPF
(b'\xcd\x80', 'LLIL_SYSCALL()'), # int 0x80 is syscall on Linux
(b"\xcd\x00", "LLIL_TRAP(0)"), # int 0
(b"\xcd\x01", "LLIL_TRAP(1)"), # int 1
(b"\xcd\x02", "LLIL_TRAP(2)"), # int 2
(b"\xcd\x03", "LLIL_TRAP(3)"), # int 3
(b"\xcd\x04", "LLIL_TRAP(4)"), # int 4
(b"\xcd\x05", "LLIL_TRAP(5)"), # int 5
(b"\xcd\x06", "LLIL_TRAP(6)"), # int 6
(b"\xcd\x07", "LLIL_TRAP(7)"), # int 7
(b"\xcd\x08", "LLIL_TRAP(8)"), # int 8
(b"\xcd\x09", "LLIL_TRAP(9)"), # int 9
(b"\xcd\x0A", "LLIL_TRAP(10)"), # int 10
(b"\xcd\x0B", "LLIL_TRAP(11)"), # int 11
(b"\xcd\x0C", "LLIL_TRAP(12)"), # int 12
(b"\xcd\x0D", "LLIL_TRAP(13)"), # int 13
(b"\xcd\x0E", "LLIL_TRAP(14)"), # int 14
(b"\xcd\x0F", "LLIL_TRAP(15)"), # int 15
(b"\xcd\x29", "LLIL_TRAP(13)"), # int 0x29 is lifted as TRAP_GPF
(b"\xcd\x80", "LLIL_SYSCALL()"), # int 0x80 is syscall on Linux
]

tests_basics = [
# nop
(b'\x90', 'LLIL_NOP()')
(b"\x90", "LLIL_NOP()"),
]

test_cases = \
tests_interrupts + \
tests_basics
tests_movd = [
# vmovd eax, xmm0
(b"\xC5\xF9\x7E\xC0", "LLIL_SET_REG.d(eax,LLIL_REG.d(xmm0))"),
# MOVD mm0, ecx
(b"\x0F\x6E\xC1", "LLIL_SET_REG.q(mm0,LLIL_ZX.q(LLIL_REG.d(ecx)))"),
# MOVD mm0, [ebx]
(
b"\x67\x0F\x6E\x03",
"LLIL_SET_REG.q(mm0,LLIL_ZX.q(LLIL_LOAD.d(LLIL_REG.q(ebx))))",
),
# MOVD ecx, mm0
(b"\x0F\x7E\xC1", "LLIL_SET_REG.d(ecx,LLIL_REG.d(mm0))"),
# MOVD [ebx], mm0
(b"\x67\x0F\x7E\x03", "LLIL_STORE.d(LLIL_REG.q(ebx),LLIL_REG.d(mm0))"),
# MOVD xmm0, ecx
(b"\x66\x0F\x6E\xC1", "LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_REG.d(ecx)))"),
# MOVD xmm0, [ebx]
(
b"\x67\x66\x0F\x6E\x03",
"LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_LOAD.d(LLIL_REG.q(ebx))))",
),
# MOVD ecx, xmm0
(b"\x66\x0F\x7E\xC1", "LLIL_SET_REG.d(ecx,LLIL_REG.d(xmm0))"),
# MOVD [ebx], xmm0
(b"\x67\x66\x0F\x7E\x03", "LLIL_STORE.d(LLIL_REG.q(ebx),LLIL_REG.d(xmm0))"),
# MOVQ mm0, rcx
(b"\x48\x0F\x6E\xC1", "LLIL_SET_REG.q(mm0,LLIL_REG.q(rcx))"),
# MOVQ mm0, [rbx]
(b"\x0F\x6F\x03", "LLIL_SET_REG.q(mm0,LLIL_LOAD.q(LLIL_REG.q(rbx)))"),
# MOVQ rcx, mm0
(b"\x48\x0F\x7E\xC1", "LLIL_SET_REG.q(rcx,LLIL_REG.q(mm0))"),
# MOVQ [rbx], mm0
(b"\x0F\x7F\x03", "LLIL_STORE.q(LLIL_REG.q(rbx),LLIL_REG.q(mm0))"),
# MOVQ xmm0, rcx
(b"\x66\x48\x0F\x6E\xC1", "LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_REG.q(rcx)))"),
# MOVQ xmm0, [rbx]
(
b"\xF3\x0F\x7E\x03",
"LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_LOAD.q(LLIL_REG.q(rbx))))",
),
# MOVQ rcx, xmm0
(b"\x66\x48\x0F\x7E\xC1", "LLIL_SET_REG.q(rcx,LLIL_REG.q(xmm0))"),
# MOVQ [rbx], xmm0
(b"\x66\x0F\xD6\x03", "LLIL_STORE.q(LLIL_REG.q(rbx),LLIL_REG.q(xmm0))"),
# VMOVD xmm0, ecx
(b"\xC5\xF9\x6E\xC1", "LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_REG.d(ecx)))"),
# VMOVD xmm0, [ebx]
(
b"\x67\xC5\xF9\x6E\x03",
"LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_LOAD.d(LLIL_REG.q(ebx))))",
),
# VMOVD ecx, xmm0
(b"\xC5\xF9\x7E\xC1", "LLIL_SET_REG.d(ecx,LLIL_REG.d(xmm0))"),
# VMOVD [ebx], xmm0
(b"\x67\xC5\xF9\x7E\x03", "LLIL_STORE.d(LLIL_REG.q(ebx),LLIL_REG.d(xmm0))"),
# VMOVQ xmm0, rcx
(b"\xC4\xE1\xF9\x6E\xC1", "LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_REG.q(rcx)))"),
# VMOVQ xmm0, [rbx]
(
b"\xC5\xFA\x7E\x03",
"LLIL_SET_REG.o(xmm0,LLIL_ZX.o(LLIL_LOAD.q(LLIL_REG.q(rbx))))",
),
# VMOVQ rcx, xmm0
(b"\xC4\xE1\xF9\x7E\xC1", "LLIL_SET_REG.q(rcx,LLIL_REG.q(xmm0))"),
# VMOVQ [rbx], xmm0
(b"\xC5\xF9\xD6\x03", "LLIL_STORE.q(LLIL_REG.q(rbx),LLIL_REG.q(xmm0))"),
# MOVDIRI [ebx], ecx
(b"\x67\x0F\x38\xF9\x0B", "LLIL_STORE.d(LLIL_REG.q(ebx),LLIL_REG.d(ecx))"),
# MOVDIRI [rbx], rcx
(b"\x48\x0F\x38\xF9\x0B", "LLIL_STORE.q(LLIL_REG.q(rbx),LLIL_REG.q(rcx))"),
# MOVDIR64B rcx, [rbx]
(
b"\x66\x0F\x38\xF8\x0B",
"LLIL_INTRINSIC([temp0],_movdir64b,[LLIL_REG.q(rcx),LLIL_LOAD?(LLIL_REG.q(rbx)),LLIL_REG.q(rcx)]); LLIL_STORE?(LLIL_REG.q(rcx),LLIL_REG?(temp0))",
),
]


test_cases = tests_interrupts + tests_basics + tests_movd

import re
import sys
Expand All @@ -37,28 +113,41 @@
from binaryninja import lowlevelil
from binaryninja.enums import LowLevelILOperation


def il2str(il):
sz_lookup = {1:'.b', 2:'.w', 4:'.d', 8:'.q', 16:'.o'}
sz_lookup = {1: ".b", 2: ".w", 4: ".d", 8: ".q", 16: ".o"}
if isinstance(il, lowlevelil.LowLevelILInstruction):
size_code = sz_lookup.get(il.size, '?') if il.size else ''
flags_code = '' if not hasattr(il, 'flags') or not il.flags else '{%s}'%il.flags

size_code = sz_lookup.get(il.size, "?") if il.size else ""
flags_code = (
"" if not hasattr(il, "flags") or not il.flags else "{%s}" % il.flags
)
# print size-specified IL constants in hex
if il.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR] and il.size:
if (
il.operation
in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]
and il.size
):
tmp = il.operands[0]
if tmp < 0: tmp = (1<<(il.size*8))+tmp
tmp = '0x%X' % tmp if il.size else '%d' % il.size
return 'LLIL_CONST%s(%s)' % (size_code, tmp)
if tmp < 0:
tmp = (1 << (il.size * 8)) + tmp
tmp = "0x%X" % tmp if il.size else "%d" % il.size
return "LLIL_CONST%s(%s)" % (size_code, tmp)
else:
return '%s%s%s(%s)' % (il.operation.name, size_code, flags_code, ','.join([il2str(o) for o in il.operands]))
return "%s%s%s(%s)" % (
il.operation.name,
size_code,
flags_code,
",".join([il2str(o) for o in il.operands]),
)
elif isinstance(il, list):
return '[' + ','.join([il2str(x) for x in il]) + ']'
return "[" + ",".join([il2str(x) for x in il]) + "]"
else:
return str(il)


# TODO: make this less hacky
def instr_to_il(data):
platform = binaryninja.Platform['linux-x86']
platform = binaryninja.Platform["linux-x86_64"]
# make a pretend function that returns
bv = binaryview.BinaryView.new(data)
bv.add_function(0, plat=platform)
Expand All @@ -68,60 +157,63 @@ def instr_to_il(data):
for block in bv.functions[0].lifted_il:
for il in block:
result.append(il2str(il))
result = '; '.join(result)
result = "; ".join(result)

try:
result = result[0:result.rindex('; LLIL_UNDEF{none}()')]
result = result[0 : result.rindex("; LLIL_UNDEF{none}()")]
except:
pass

try:
result = result[0:result.rindex('; LLIL_UNDEF()')]
result = result[0 : result.rindex("; LLIL_UNDEF()")]
except:
pass

return result


def il_str_to_tree(ilstr):
result = ''
result = ""
depth = 0
for c in ilstr:
if c == '(':
result += '\n'
if c == "(":
result += "\n"
depth += 1
result += ' '*depth
elif c == ')':
result += " " * depth
elif c == ")":
depth -= 1
elif c == ',':
result += '\n'
result += ' '*depth
elif c == ",":
result += "\n"
result += " " * depth
pass
else:
result += c
return result


def test_all():
for (test_i, (data, expected)) in enumerate(test_cases):
for test_i, (data, expected) in enumerate(test_cases):
actual = instr_to_il(data)
if actual != expected:
print('MISMATCH AT TEST %d!' % test_i)
print('\t input: %s' % data.hex())
print('\texpected: %s' % expected)
print('\t actual: %s' % actual)
print('\t tree:')
print("MISMATCH AT TEST %d!" % test_i)
print("\t input: %s" % data.hex())
print("\texpected: %s" % expected)
print("\t actual: %s" % actual)
print("\t tree:")
print(il_str_to_tree(actual))

return False

return True

if __name__ == '__main__':

if __name__ == "__main__":
if test_all():
print('success!')
print("success!")
sys.exit(0)
else:
sys.exit(-1)

if __name__ == 'test_lifting':
if __name__ == "test_lifting":
if test_all():
print('success!')
print("success!")