binaryninja-api/python/examples/wf_test_copy_expr.py at dev · Vector35/binaryninja-api

322 lines (287 loc) · 15 KB
import functools
import json
import math
from binaryninja import Workflow, Activity, AnalysisContext, ReportCollection, \
    FlowGraphReport, show_report_collection, DisassemblySettings, DisassemblyOption
from binaryninja.lowlevelil import *
from binaryninja.mediumlevelil import *
This workflow copies every instruction in an IL function to a new IL function and then
verifies that they are exactly the same.
def assert_llil_eq(old_insn: LowLevelILInstruction, new_insn: LowLevelILInstruction):
    Make sure that these two instructions are the same (probably correct). Asserts otherwise.
    Note: This ignores when instructions reference other instructions by index directly
    as that IL indices are not guaranteed to be consistent. So things like goto/if/jump_to
    will check that the target of the branch is the same, but allow the target to have
    a different instruction index.
    err_msg = (hex(old_insn.address), old_insn, new_insn)
    assert old_insn.operation == new_insn.operation, err_msg
    # assert old_insn.attributes == new_insn.attributes, err_msg
    assert old_insn.size == new_insn.size, err_msg
    assert old_insn.raw_flags == new_insn.raw_flags, err_msg
    assert old_insn.source_location == new_insn.source_location, err_msg
    assert len(old_insn.operands) == len(new_insn.operands), err_msg
    # Can't compare operands directly since IL expression indices might change when
    # copying an instruction to another function
    for i, (old_op, new_op) in enumerate(zip(old_insn.detailed_operands, new_insn.detailed_operands)):
        err_msg = (hex(old_insn.address), f'op {i}', old_insn, new_insn, old_op, new_op)
        assert old_op[0] == new_op[0], err_msg  # op name
        assert old_op[2] == new_op[2], err_msg  # op type
        op_type = old_op[2]
        if op_type == 'LowLevelILInstruction':
            assert_llil_eq(old_op[1], new_op[1])
        elif op_type == 'InstructionIndex' or \
                (old_insn.operation == LowLevelILOperation.LLIL_GOTO and old_op[0] == 'dest') or \
                (old_insn.operation == LowLevelILOperation.LLIL_IF and old_op[0] == 'true') or \
                (old_insn.operation == LowLevelILOperation.LLIL_IF and old_op[0] == 'false'):
            # These aren't consistent if the old function has instructions outside BBs
            # (they are not copied), so just make sure the target instruction looks the same
            assert old_insn.function[old_op[1]].operation == new_insn.function[new_op[1]].operation
        elif op_type in [
            'List[LowLevelILInstruction]',
            'List[\'LowLevelILInstruction\']'  # compat (ew)
            for old_sub, new_sub in zip(old_op[1], new_op[1]):
                assert_llil_eq(old_sub, new_sub)
        elif op_type == 'float':
            if math.isnan(old_op[1]) and math.isnan(new_op[1]):
                # both nan so they will compare not equal
            else:
                assert old_op[1] == new_op[1], err_msg
        elif old_insn.operation == LowLevelILOperation.LLIL_JUMP_TO and old_op[0] == 'targets':
            for old_target, new_target in zip(sorted(old_op[1].items()), sorted(new_op[1].items())):
                assert old_target[0] == new_target[0], err_msg
                # Same as with instruction index
                assert_llil_eq(old_insn.function[old_target[1]], new_insn.function[new_target[1]])
        else:
            # TODO: Any other types of ops need special behavior?
            assert old_op[1] == new_op[1], err_msg
@functools.lru_cache(maxsize=8)
def get_mlil_maps(mlil: MediumLevelILFunction, builders: bool) -> Tuple[LLILSSAToMLILInstructionMapping, LLILSSAToMLILExpressionMapping]:
    instr_map = mlil._get_llil_ssa_to_mlil_instr_map(builders)
    expr_map = mlil._get_llil_ssa_to_mlil_expr_map(builders)
    return instr_map, expr_map
def assert_mlil_eq(old_insn: MediumLevelILInstruction, new_insn: MediumLevelILInstruction):
    Make sure that these two instructions are the same (probably correct). Asserts otherwise.
    Note: This ignores when instructions reference other instructions by index directly
    as that IL indices are not guaranteed to be consistent. So things like goto/if/jump_to
    will check that the target of the branch is the same, but allow the target to have
    a different instruction index.
    err_msg = (hex(old_insn.address), old_insn, new_insn)
    assert old_insn.operation == new_insn.operation, err_msg
    assert old_insn.attributes == new_insn.attributes, err_msg
    assert old_insn.size == new_insn.size, err_msg
    assert old_insn.source_location == new_insn.source_location, err_msg
    assert len(old_insn.operands) == len(new_insn.operands), err_msg
    # Type only applies once we've generated SSA form (probably not consistent)
    # assert old_insn.expr_type == new_insn.expr_type, f"{err_msg} {old_insn.expr_type} {new_insn.expr_type}"
    instr_map, expr_map = get_mlil_maps(new_insn.function, True)
    # Compare that the instruction's LLIL SSA map is the same as the old function
    if old_insn.instr_index is not None and old_insn.function.get_expr_index_for_instruction(old_insn.instr_index) == old_insn.expr_index:
        old_llil_ssa = old_insn.function.get_low_level_il_instruction_index(old_insn.instr_index)
        if old_llil_ssa is not None:
            assert [mlil for (llil, mlil) in instr_map.items() if llil == old_llil_ssa] == [new_insn.instr_index], err_msg
        else:
            assert [mlil for (llil, mlil) in instr_map.items() if llil == old_llil_ssa] == [], err_msg
    # Can't compare operands directly since IL expression indices might change when
    # copying an instruction to another function
    for i, (old_op, new_op) in enumerate(zip(old_insn.detailed_operands, new_insn.detailed_operands)):
        err_msg = (hex(old_insn.address), f'op {i}', old_insn, new_insn, old_op, new_op)
        assert old_op[0] == new_op[0], err_msg  # op name
        assert old_op[2] == new_op[2], err_msg  # op type
        op_type = old_op[2]
        if op_type == 'MediumLevelILInstruction':
            assert_mlil_eq(old_op[1], new_op[1])
        elif op_type == 'InstructionIndex' or \
                (old_insn.operation == MediumLevelILOperation.MLIL_GOTO and old_op[0] == 'dest') or \
                (old_insn.operation == MediumLevelILOperation.MLIL_IF and old_op[0] == 'true') or \
                (old_insn.operation == MediumLevelILOperation.MLIL_IF and old_op[0] == 'false'):
            # These aren't consistent if the old function has instructions outside BBs
            # (they are not copied), so just make sure the target instruction looks the same
            assert old_insn.function[old_op[1]].operation == new_insn.function[new_op[1]].operation
        elif op_type == 'List[MediumLevelILInstruction]':
            for old_sub, new_sub in zip(old_op[1], new_op[1]):
                assert_mlil_eq(old_sub, new_sub)
        elif op_type == 'float':
            if math.isnan(old_op[1]) and math.isnan(new_op[1]):
                # both nan so they will compare not equal
            else:
                assert old_op[1] == new_op[1], err_msg
        elif op_type == 'Variable':
            assert old_op[1].core_variable == new_op[1].core_variable, err_msg
        elif op_type == 'List[Variable]':
            for old_sub, new_sub in zip(old_op[1], new_op[1]):
                err_msg = (hex(old_insn.address), f'op {i}', old_insn, new_insn, old_op, new_op, old_sub, new_sub)
                assert old_sub.core_variable == new_sub.core_variable, err_msg
        elif op_type == 'SSAVariable':
            assert old_op[1].var.core_variable == new_op[1].var.core_variable, err_msg
            assert old_op[1].version == new_op[1].version, err_msg
        elif old_insn.operation == MediumLevelILOperation.MLIL_JUMP_TO and old_op[0] == 'targets':
            for old_target, new_target in zip(sorted(old_op[1].items()), sorted(new_op[1].items())):
                err_msg = (hex(old_insn.address), f'op {i}', old_insn, new_insn, old_op, new_op, old_target, new_target)
                assert old_target[0] == new_target[0], err_msg
                # Same as with instruction index
                assert_mlil_eq(old_insn.function[old_target[1]], new_insn.function[new_target[1]])
        else:
            # TODO: Any other types of ops need special behavior?
            assert old_op[1] == new_op[1], err_msg
def lil_action(context: AnalysisContext):
    def translate_instr(
            new_func: LowLevelILFunction,
            old_block: LowLevelILBasicBlock,
            old_instr: LowLevelILInstruction,
        # no-op copy
        return old_instr.copy_to(
            new_func,
            lambda sub_instr: translate_instr(new_func, old_block, sub_instr)
    old_lil = context.lifted_il
    if old_lil is None:
        return
    new_lil = old_lil.translate(translate_instr)
    new_lil.finalize()
    if context.function.check_for_debug_report("copy_expr_test_lil"):
        # debug the test :)
        report = ReportCollection()
        settings = DisassemblySettings()
        settings.set_option(DisassemblyOption.ShowAddress, True)
        report.append(FlowGraphReport("old graph", old_lil.create_graph_immediate(settings)))
        report.append(FlowGraphReport("new graph", new_lil.create_graph_immediate(settings)))
        show_report_collection("copy expr test", report)
    # Check all BBs have all the same instructions
    # Technically, this misses any instructions outside a BB, but those are not
    # picked up by analysis anyway, and therefore don't matter.
    assert len(old_lil.basic_blocks) == len(new_lil.basic_blocks)
    for old_bb, new_bb in zip(old_lil.basic_blocks, new_lil.basic_blocks):
        assert len(old_bb) == len(new_bb)
        for old_insn, new_insn in zip(old_bb, new_bb):
            assert_llil_eq(old_insn, new_insn)
def llil_action(context: AnalysisContext):
    def translate_instr(
            new_func: LowLevelILFunction,
            old_block: LowLevelILBasicBlock,
            old_instr: LowLevelILInstruction,
        # no-op copy
        return old_instr.copy_to(
            new_func,
            lambda sub_instr: translate_instr(new_func, old_block, sub_instr)
    old_llil = context.llil
    if old_llil is None:
        return
    new_llil = old_llil.translate(translate_instr)
    new_llil.finalize()
    new_llil.generate_ssa_form()
    if context.function.check_for_debug_report("copy_expr_test_llil"):
        # debug the test :)
        report = ReportCollection()
        settings = DisassemblySettings()
        settings.set_option(DisassemblyOption.ShowAddress, True)
        report.append(FlowGraphReport("old graph", old_llil.create_graph_immediate(settings)))
        report.append(FlowGraphReport("new graph", new_llil.create_graph_immediate(settings)))
        show_report_collection("copy expr test", report)
    # Check all BBs have all the same instructions
    # Technically, this misses any instructions outside a BB, but those are not
    # picked up by analysis anyway, and therefore don't matter.
    assert len(old_llil.basic_blocks) == len(new_llil.basic_blocks)
    for old_bb, new_bb in zip(old_llil.basic_blocks, new_llil.basic_blocks):
        assert len(old_bb) == len(new_bb)
        for old_insn, new_insn in zip(old_bb, new_bb):
            assert_llil_eq(old_insn, new_insn)
def mlil_action(context: AnalysisContext):
    def translate_instr(
            new_func: MediumLevelILFunction,
            old_block: MediumLevelILBasicBlock,
            old_instr: MediumLevelILInstruction,
        # no-op copy
        return old_instr.copy_to(
            new_func,
            lambda sub_instr: translate_instr(new_func, old_block, sub_instr)
    old_mlil = context.mlil
    if old_mlil is None:
        return
    new_mlil = old_mlil.translate(translate_instr)
    new_mlil.finalize()
    new_mlil.generate_ssa_form()
    if context.function.check_for_debug_report("copy_expr_test_mlil"):
        # debug the test :)
        report = ReportCollection()
        settings = DisassemblySettings()
        settings.set_option(DisassemblyOption.ShowAddress, True)
        report.append(FlowGraphReport("old graph", old_mlil.create_graph_immediate(settings)))
        report.append(FlowGraphReport("new graph", new_mlil.create_graph_immediate(settings)))
        show_report_collection("copy expr test", report)
    # Check expr mappings are the same
    new_map = list(sorted(new_mlil._get_llil_ssa_to_mlil_expr_map(True), key=lambda o: (o.lower_index, o.higher_index)))
    old_map = list(sorted(old_mlil._get_llil_ssa_to_mlil_expr_map(False), key=lambda o: (o.lower_index, o.higher_index)))
    assert old_map == new_map
    # Check all BBs have all the same instructions
    # Technically, this misses any instructions outside a BB, but those are not
    # picked up by analysis anyway, and therefore don't matter.
    assert len(old_mlil.basic_blocks) == len(new_mlil.basic_blocks)
    for old_bb, new_bb in zip(old_mlil.basic_blocks, new_mlil.basic_blocks):
        assert len(old_bb) == len(new_bb)
        for old_insn, new_insn in zip(old_bb, new_bb):
            assert_mlil_eq(old_insn, new_insn)
    # Make sure mappings update correctly following set
    new_map = list(sorted(new_mlil._get_llil_ssa_to_mlil_expr_map(True), key=lambda o: (o.lower_index, o.higher_index)))
    context.mlil = new_mlil
    newer_map = list(sorted(context.mlil._get_llil_ssa_to_mlil_expr_map(False), key=lambda o: (o.lower_index, o.higher_index)))
    assert new_map == newer_map
wf = Workflow("core.function.metaAnalysis").clone("core.function.metaAnalysis")
# Define the custom activity configuration
wf.register_activity(Activity(
    configuration=json.dumps({
        "name": "extension.test_copy_expr.lil_action",
        "title": "Lifted IL copy_expr Test",
        "description": "Makes sure copy_expr works on Lifted IL functions.",
        "eligibility": {
            "auto": {
                "default": False
    action=lil_action
wf.register_activity(Activity(
    configuration=json.dumps({
        "name": "extension.test_copy_expr.llil_action",
        "title": "Low Level IL copy_expr Test",
        "description": "Makes sure copy_expr works on Low Level IL functions.",
        "eligibility": {
            "auto": {
                "default": False
    action=llil_action
wf.register_activity(Activity(
    configuration=json.dumps({
        "name": "extension.test_copy_expr.mlil_action",
        "title": "Medium Level IL copy_expr Test",
        "description": "Makes sure copy_expr works on Medium Level IL functions.",
        "eligibility": {
            "auto": {
                "default": False
    action=mlil_action
wf.insert("core.function.analyzeAndExpandFlags", ["extension.test_copy_expr.lil_action"])
wf.insert("core.function.generateMediumLevelIL", ["extension.test_copy_expr.llil_action"])
wf.insert("core.function.generateHighLevelIL", ["extension.test_copy_expr.mlil_action"])
# TODO: MLIL and higher
wf.register()
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

wf_test_copy_expr.py

Latest commit

History

wf_test_copy_expr.py

File metadata and controls