Skip to content

Commit faa3695

Browse files
committed
Add jump-to-trampoline peephole optimization.
Adds a modest speedup for some examples.
1 parent 943bce2 commit faa3695

5 files changed

Lines changed: 98 additions & 2 deletions

File tree

src/e9patch/e9api.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,65 @@ static void parsePatch(Binary *B, const Message &msg)
393393
queuePatch(B, I, T);
394394
}
395395

396+
/*
397+
* Optimize CFT instructions that target patched instructions trampolines.
398+
* We can instead jump directly to the trampoline if possible.
399+
*/
400+
static void optimizePeephole2(Binary *B)
401+
{
402+
for (const auto &entry: B->Is)
403+
{
404+
off_t offset = entry.first;
405+
Instr *I = entry.second;
406+
if (I->trampoline != INTPTR_MIN)
407+
continue;
408+
if (I->size != /*sizeof(jmpq/call rel32)=*/5 &&
409+
I->size != /*sizeof(jcc rel32)=*/6)
410+
continue;
411+
bool jcc = false;
412+
switch (I->original.bytes[0])
413+
{
414+
case 0xE8: case 0xE9:
415+
break;
416+
case 0x0F:
417+
switch (I->original.bytes[1])
418+
{
419+
case 0x80: case 0x81: case 0x82: case 0x83: case 0x84:
420+
case 0x85: case 0x86: case 0x87: case 0x88: case 0x89:
421+
case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E:
422+
case 0x8F:
423+
jcc = true;
424+
break;
425+
default:
426+
continue;
427+
}
428+
break;
429+
default:
430+
continue;
431+
}
432+
bool ok = true;
433+
for (size_t i = 0; ok && i < I->size; i++)
434+
ok = (I->patched.state[i] == STATE_INSTRUCTION);
435+
if (!ok)
436+
continue;
437+
438+
int32_t rel32 = *(int32_t *)(I->original.bytes + (jcc? 2: 1));
439+
intptr_t target = I->addr + (intptr_t)I->size + (intptr_t)rel32;
440+
offset += (off_t)I->size + (off_t)rel32;
441+
auto i = B->Is.find(offset);
442+
if (i == B->Is.end())
443+
continue;
444+
Instr *J = i->second;
445+
if (J->addr != target || J->trampoline == INTPTR_MIN)
446+
continue;
447+
intptr_t diff = J->trampoline - (I->addr + (intptr_t)I->size);
448+
if (diff < INT32_MIN || diff > INT32_MAX)
449+
continue;
450+
int32_t diff32 = (int32_t)diff;
451+
*(int32_t *)(I->patched.bytes + (jcc? 2: 1)) = diff32;
452+
}
453+
}
454+
396455
/*
397456
* Parse an emit message.
398457
*/
@@ -425,11 +484,14 @@ static void parseEmit(Binary *B, const Message &msg)
425484
error("failed to parse \"emit\" message (id=%u); duplicate "
426485
"parameters detected");
427486

428-
429487
// Flush the queue:
430488
queueFlush(B, INTPTR_MIN);
431489
putchar('\n');
432490

491+
// Post-processing optimizations:
492+
if (option_Ojump_peephole_2)
493+
optimizePeephole2(B);
494+
433495
// Create and optimize the mappings:
434496
MappingSet mappings;
435497
buildMappings(B->allocator, option_mem_mapping_size, mappings);

src/e9patch/e9patch.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ bool option_tactic_backward_T3 = true;
4040
unsigned option_Ojump_elim = 0;
4141
unsigned option_Ojump_elim_size = 64;
4242
bool option_Ojump_peephole = true;
43+
bool option_Ojump_peephole_2 = true;
4344
bool option_Oorder_trampolines = false;
4445
bool option_Oscratch_stack = false;
4546
size_t option_mem_granularity = 64;
@@ -186,6 +187,10 @@ static void usage(FILE *stream, const char *progname)
186187
"\t\tEnables [disables] jump-from-trampoline peephole optimization.\n"
187188
"\t\tDefault: true (enabled)\n"
188189
"\n"
190+
"\t-Ojump-peephole-2[=false]\n"
191+
"\t\tEnables [disables] jump-to-trampoline peephole optimization.\n"
192+
"\t\tDefault: true (enabled)\n"
193+
"\n"
189194
"\t-Oorder-trampolines[=false]\n"
190195
"\t\tEnables [disables] the ordering of trampolines with respect\n"
191196
"\t\tto the original instruction ordering (as much as is possible).\n"
@@ -295,6 +300,7 @@ enum Option
295300
OPTION_OJUMP_ELIM,
296301
OPTION_OJUMP_ELIM_SIZE,
297302
OPTION_OJUMP_PEEPHOLE,
303+
OPTION_OJUMP_PEEPHOLE_2,
298304
OPTION_OORDER_TRAMPOLINES,
299305
OPTION_OSCRATCH_STACK,
300306
OPTION_OUTPUT,
@@ -322,6 +328,7 @@ void parseOptions(int argc, char * const argv[], bool api)
322328
{"Ojump-elim", req_arg, nullptr, OPTION_OJUMP_ELIM},
323329
{"Ojump-elim-size", req_arg, nullptr, OPTION_OJUMP_ELIM_SIZE},
324330
{"Ojump-peephole", opt_arg, nullptr, OPTION_OJUMP_PEEPHOLE},
331+
{"Ojump-peephole-2", opt_arg, nullptr, OPTION_OJUMP_PEEPHOLE_2},
325332
{"Oorder-trampolines", opt_arg, nullptr, OPTION_OORDER_TRAMPOLINES},
326333
{"Oscratch-stack", opt_arg, nullptr, OPTION_OSCRATCH_STACK},
327334
{"debug", no_arg, nullptr, OPTION_DEBUG},
@@ -391,6 +398,10 @@ void parseOptions(int argc, char * const argv[], bool api)
391398
option_Ojump_peephole =
392399
parseBoolOptArg("-Ojump-peephole", optarg);
393400
break;
401+
case OPTION_OJUMP_PEEPHOLE_2:
402+
option_Ojump_peephole_2 =
403+
parseBoolOptArg("-Ojump-peephole-2", optarg);
404+
break;
394405
case OPTION_OORDER_TRAMPOLINES:
395406
option_Oorder_trampolines =
396407
parseBoolOptArg("-Oorder-trampolines", optarg);

src/e9patch/e9patch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ extern bool option_debug;
396396
extern unsigned option_Ojump_elim;
397397
extern unsigned option_Ojump_elim_size;
398398
extern bool option_Ojump_peephole;
399+
extern bool option_Ojump_peephole_2;
399400
extern bool option_Oorder_trampolines;
400401
extern bool option_Oscratch_stack;
401402
extern bool option_tactic_B1;

src/e9tool/e9frontend.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2055,8 +2055,9 @@ struct Instr
20552055
size_t data:1; // (E9Tool internal)
20562056
size_t patch:1; // (E9Tool internal)
20572057
size_t emitted:1; // (E9Tool internal)
2058+
size_t jump:1; // (E9Tool internal)
20582059

2059-
Instr() : patch(0), emitted(0), action(0)
2060+
Instr() : patch(0), emitted(0), action(0), jump(0)
20602061
{
20612062
;
20622063
}

src/e9tool/e9tool.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,6 +2538,7 @@ int main(int argc, char **argv)
25382538
options.push_back("-Ojump-elim=0");
25392539
options.push_back("-Ojump-elim-size=0");
25402540
options.push_back("-Ojump-peephole=false");
2541+
options.push_back("-Ojump-peephole-2=false");
25412542
options.push_back("-Oorder-trampolines=false");
25422543
options.push_back("-Oscratch-stack=false");
25432544
options.push_back("--mem-granularity=64");
@@ -2547,6 +2548,7 @@ int main(int argc, char **argv)
25472548
options.push_back("-Ojump-elim-size=0");
25482549
options.push_back("-Oorder-trampolines=false");
25492550
options.push_back("-Ojump-peephole=true");
2551+
options.push_back("-Ojump-peephole-2=true");
25502552
options.push_back("-Oscratch-stack=true");
25512553
options.push_back("--mem-granularity=128");
25522554
break;
@@ -2555,6 +2557,7 @@ int main(int argc, char **argv)
25552557
options.push_back("-Ojump-elim-size=64");
25562558
options.push_back("-Oorder-trampolines=true");
25572559
options.push_back("-Ojump-peephole=true");
2560+
options.push_back("-Ojump-peephole-2=true");
25582561
options.push_back("-Oscratch-stack=true");
25592562
options.push_back("--mem-granularity=128");
25602563
break;
@@ -2563,13 +2566,15 @@ int main(int argc, char **argv)
25632566
options.push_back("-Ojump-elim-size=512");
25642567
options.push_back("-Oorder-trampolines=true");
25652568
options.push_back("-Ojump-peephole=true");
2569+
options.push_back("-Ojump-peephole-2=true");
25662570
options.push_back("-Oscratch-stack=true");
25672571
options.push_back("--mem-granularity=4096");
25682572
break;
25692573
case 's':
25702574
options.push_back("-Ojump-elim=0");
25712575
options.push_back("-Ojump-elim-size=0");
25722576
options.push_back("-Ojump-peephole=true");
2577+
options.push_back("-Ojump-peephole-2=true");
25732578
options.push_back("-Oorder-trampolines=true");
25742579
options.push_back("-Oscratch-stack=true");
25752580
options.push_back("--mem-granularity=4096");
@@ -2758,6 +2763,10 @@ int main(int argc, char **argv)
27582763
I.string.instr,
27592764
(matched && option_is_tty? "\33[0m": ""),
27602765
(matched && !option_is_tty? " (matched)": ""));
2766+
if (I.size >= /*sizeof(jmpq)=*/5 &&
2767+
((I.category & CATEGORY_JUMP) != 0 ||
2768+
(I.category & CATEGORY_CALL) != 0))
2769+
Is[i].jump = true;
27612770
}
27622771
notifyPlugins(backend.out, &elf, Is.data(), Is.size(),
27632772
EVENT_MATCHING_COMPLETE);
@@ -2768,6 +2777,18 @@ int main(int argc, char **argv)
27682777
intptr_t id = -1;
27692778
for (ssize_t i = (ssize_t)count - 1; i >= 0; i--)
27702779
{
2780+
switch (option_optimization_level)
2781+
{
2782+
case '2': case '3': case 's':
2783+
if (!Is[i].emitted && Is[i].jump)
2784+
{
2785+
// Always emits jump/calls for -Ojump-peephole-2
2786+
Is[i].emitted = true;
2787+
sendInstructionMessage(backend.out, Is[i].address,
2788+
Is[i].size, Is[i].offset);
2789+
}
2790+
break;
2791+
}
27712792
if (!Is[i].patch)
27722793
continue;
27732794

0 commit comments

Comments
 (0)