Hi, @derekbruening and @AssadHashmi.
I wrote a DynamoRIO client for edge profiling as follows. And we have used this client to collect the branch information in SPEC CPU's mcf benchmark, which includes the source and destination addresses of branches.
But Compared with the native execution, our client will lead to huge performance slowdown(more than 400x). In Derek's CGO'13 paper, the overhead of mcf's edge profiling is less than 5x.
Would you like to tell me how to collect the edge information with lower overhead through DynamoRIO, or how can I improve current client?
Wenlong.
bool
tag_is_main_module(void *tag)
{
module_data_t *main_module = dr_get_main_module();
return dr_module_contains_addr(main_module, dr_fragment_app_pc(tag));
}
static void
at_br_cond(app_pc src, app_pc dst, int taken)
{
if (taken != 0) {
void *drcontext = dr_get_current_drcontext();
file_t log = (file_t)(ptr_uint_t)drmgr_get_tls_field(drcontext, tls_idx);
dr_fprintf(log, "%lx,%lx\n", src, dst);
}
}
static void
at_br_no_cond(app_pc src, app_pc dst)
{
void *drcontext = dr_get_current_drcontext();
file_t log = (file_t)(ptr_uint_t)drmgr_get_tls_field(drcontext, tls_idx);
dr_fprintf(log, "%lx,%lx\n", src, dst);
}
static dr_emit_flags_t
event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr,
bool for_trace, bool translating, void *user_data)
{
if (tag_is_main_module(tag)) {
if (instr_is_cti(instr)) {
if (instr_is_ubr(instr)) {
dr_insert_ubr_instrumentation(drcontext, bb, instr, (void *)at_br_no_cond);
} else if (instr_is_cbr(instr)) {
dr_insert_cbr_instrumentation(drcontext, bb, instr, (void *)at_br_cond);
} else if (instr_is_call_direct(instr)) {
dr_insert_call_instrumentation(drcontext, bb, instr,
(void *)at_br_no_cond);
} else if (instr_is_call_indirect(instr) || instr_is_return(instr) || instr_is_jmp_indirect(instr)) {
dr_insert_mbr_instrumentation(drcontext, bb, instr, (void *)at_br_no_cond,
SPILL_SLOT_1);
}
}
}
return DR_EMIT_DEFAULT;
}
static void
event_exit(void)
{
dr_log(NULL, DR_LOG_ALL, 1, "Client 'branch_recorder' exiting");
if (!drmgr_unregister_bb_insertion_event(event_app_instruction) ||
!drmgr_unregister_tls_field(tls_idx))
DR_ASSERT(false);
drmgr_exit();
}
static void
event_thread_init(void *drcontext)
{
file_t log;
log = log_file_open(client_id, drcontext, NULL /* using client lib path */,
"branch_recorder", DR_FILE_ALLOW_LARGE);
DR_ASSERT(log != INVALID_FILE);
drmgr_set_tls_field(drcontext, tls_idx, (void *)(ptr_uint_t)log);
}
static void
event_thread_exit(void *drcontext)
{
log_file_close((file_t)(ptr_uint_t)drmgr_get_tls_field(drcontext, tls_idx));
}
DR_EXPORT
void
dr_client_main(client_id_t id, int argc, const char *argv[])
{
dr_set_client_name("DynamoRIO Sample Client 'branch_recorder'",
dr_log(NULL, DR_LOG_ALL, 1, "Client 'branch_recorder' initializing");
drmgr_init();
client_id = id;
tls_idx = drmgr_register_tls_field();
dr_register_exit_event(event_exit);
if (!drmgr_register_thread_init_event(event_thread_init) ||
!drmgr_register_thread_exit_event(event_thread_exit) ||
!drmgr_register_bb_instrumentation_event(NULL, event_app_instruction, NULL))
DR_ASSERT(false);
}