Skip to content

Commit 33e1c26

Browse files
committed
i#1795 drreg: 1-step slowpath transition with separated TLS slots
Switches the TLS slots used to send parameters through the shared_slowpath gencode to the slowpath C code to be separate from the scratch register TLS slots. This will make it much easier to use drreg and to move toward local, asynchronous scratch register usage. Switches from a 2-step transition out of the slowpath that relies on the low-performance OP_xchg instruction to a 1-step transition, made possible by no longer clobbering the scratch TLS slots. Updates DR to abcd9b0 for the i#2136 cpuid fix, which is exposed by this change as ecx is not (overly) restored by the 1-step transition like it was for the 2-step. Review-URL: https://codereview.appspot.com/314250043
1 parent 31250df commit 33e1c26

3 files changed

Lines changed: 57 additions & 85 deletions

File tree

drmemory/slowpath.c

Lines changed: 49 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,12 +1173,12 @@ instrument_slowpath(void *drcontext, instrlist_t *bb, instr_t *inst,
11731173
} else {
11741174
instru_insert_mov_pc(drcontext, bb, inst,
11751175
(r1 == SPILL_REG_NONE) ?
1176-
spill_slot_opnd(drcontext, SPILL_SLOT_1) :
1176+
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM) :
11771177
opnd_create_reg(s1->reg),
11781178
decode_pc_opnd);
11791179
instru_insert_mov_pc(drcontext, bb, inst,
11801180
(r2 == SPILL_REG_NONE) ?
1181-
spill_slot_opnd(drcontext, SPILL_SLOT_2) :
1181+
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_RET) :
11821182
opnd_create_reg(s2->reg),
11831183
opnd_create_instr(appinst));
11841184
PRE(bb, inst, XINST_CREATE_jump(drcontext, opnd_create_pc(tgt)));
@@ -1228,16 +1228,46 @@ is_in_gencode(byte *pc)
12281228

12291229
#ifdef X86 /* XXX i#1726: update for ARM */
12301230
static void
1231-
shared_slowpath_spill(void *drcontext, instrlist_t *ilist, int type, int slot)
1231+
shared_slowpath_save_param(void *drcontext, instrlist_t *ilist, int type)
12321232
{
1233-
if (type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) {
1234-
PRE(ilist, NULL, INSTR_CREATE_xchg
1235-
(drcontext, spill_slot_opnd(drcontext, slot),
1236-
opnd_create_reg(DR_REG_XAX + (type - SPILL_REG_EAX))));
1237-
} else if (type >= SPILL_REG_EAX_DEAD && type <= SPILL_REG_EBX_DEAD) {
1233+
if ((type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ||
1234+
(type >= SPILL_REG_EAX_DEAD && type <= SPILL_REG_EBX_DEAD)) {
1235+
reg_id_t reg = (type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ?
1236+
(DR_REG_XAX + (type - SPILL_REG_EAX)) :
1237+
(DR_REG_XAX + (type - SPILL_REG_EAX_DEAD));
1238+
/* Store from site-specific reg into TLS for clean call param */
12381239
PRE(ilist, NULL, INSTR_CREATE_mov_st
1239-
(drcontext, spill_slot_opnd(drcontext, slot),
1240-
opnd_create_reg(DR_REG_XAX + (type - SPILL_REG_EAX_DEAD))));
1240+
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM),
1241+
opnd_create_reg(reg)));
1242+
} /* else param was put straight in tls slot */
1243+
}
1244+
1245+
static void
1246+
shared_slowpath_save_retaddr(void *drcontext, instrlist_t *ilist, int type)
1247+
{
1248+
if ((type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ||
1249+
(type >= SPILL_REG_EAX_DEAD && type <= SPILL_REG_EBX_DEAD)) {
1250+
reg_id_t reg = (type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ?
1251+
(DR_REG_XAX + (type - SPILL_REG_EAX)) :
1252+
(DR_REG_XAX + (type - SPILL_REG_EAX_DEAD));
1253+
/* Store from site-specific reg into TLS for clean call ret */
1254+
PRE(ilist, NULL, INSTR_CREATE_mov_st
1255+
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_RET),
1256+
opnd_create_reg(reg)));
1257+
} /* else param was put straight in tls slot */
1258+
}
1259+
1260+
static void
1261+
shared_slowpath_restore(void *drcontext, instrlist_t *ilist, int type, int slot)
1262+
{
1263+
if ((type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ||
1264+
(type >= SPILL_REG_EAX_DEAD && type <= SPILL_REG_EBX_DEAD)) {
1265+
reg_id_t reg = (type >= SPILL_REG_EAX && type <= SPILL_REG_EBX) ?
1266+
(DR_REG_XAX + (type - SPILL_REG_EAX)) :
1267+
(DR_REG_XAX + (type - SPILL_REG_EAX_DEAD));
1268+
/* Restore app value to reg for emulation in slowpath */
1269+
PRE(ilist, NULL, INSTR_CREATE_mov_ld
1270+
(drcontext, opnd_create_reg(reg), spill_slot_opnd(drcontext, slot)));
12411271
} /* else param was put straight in tls slot */
12421272
}
12431273
#endif
@@ -1264,13 +1294,11 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
12641294
shared_slowpath_entry = pc;
12651295
dr_insert_clean_call(drcontext, ilist, NULL,
12661296
(void *) slow_path, false, 2,
1267-
spill_slot_opnd(drcontext, SPILL_SLOT_1),
1268-
spill_slot_opnd(drcontext, SPILL_SLOT_1));
1297+
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM),
1298+
spill_slot_opnd(drcontext, SPILL_SLOT_SLOW_PARAM));
12691299
PRE(ilist, NULL,
12701300
XINST_CREATE_jump_mem(drcontext, spill_slot_opnd
1271-
(drcontext, whole_bb_spills_enabled() ?
1272-
/* for whole-bb spills we need two-step return */
1273-
SPILL_SLOT_5 : SPILL_SLOT_2)));
1301+
(drcontext, SPILL_SLOT_SLOW_RET)));
12741302
pc = instrlist_encode(drcontext, ilist, pc, false);
12751303
instrlist_clear(drcontext, ilist);
12761304

@@ -1282,10 +1310,7 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
12821310
/* for whole-bb, eflags is never restored here */
12831311
for (ef = 0; ef < (whole_bb_spills_enabled() ? 1 : SPILL_EFLAGS_NUM);
12841312
ef++) {
1285-
instr_t *return_point = NULL;
1286-
if (whole_bb_spills_enabled()) {
1287-
return_point = INSTR_CREATE_label(drcontext);
1288-
} else if (ef != SPILL_EFLAGS_NOSPILL) {
1313+
if (!whole_bb_spills_enabled() && ef != SPILL_EFLAGS_NOSPILL) {
12891314
if (ef == SPILL_EFLAGS_6_EAX ||
12901315
ef == SPILL_EFLAGS_6_NOEAX) {
12911316
PRE(ilist, NULL, INSTR_CREATE_add
@@ -1300,7 +1325,7 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
13001325
}
13011326
}
13021327
if (whole_bb_spills_enabled()) {
1303-
shared_slowpath_spill(drcontext, ilist, r3, SPILL_SLOT_4);
1328+
shared_slowpath_restore(drcontext, ilist, r3, SPILL_SLOT_4);
13041329
} else if (r3 != SPILL_REG3_NOSPILL) {
13051330
restore_reg(drcontext, ilist, NULL, SPILL_REG3_REG,
13061331
spill_reg3_slot(ef == SPILL_EFLAGS_NOSPILL,
@@ -1311,7 +1336,8 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
13111336
r2 >= SPILL_REG_EAX_DEAD &&
13121337
r2 <= SPILL_REG_EBX_DEAD));
13131338
}
1314-
shared_slowpath_spill(drcontext, ilist, r2, SPILL_SLOT_2);
1339+
shared_slowpath_save_retaddr(drcontext, ilist, r2);
1340+
shared_slowpath_restore(drcontext, ilist, r2, SPILL_SLOT_2);
13151341
if (options.single_arg_slowpath) {
13161342
/* for jmp-to-slowpath optimization we don't have 2nd
13171343
* param, so pass 0 (PR 494769)
@@ -1334,71 +1360,11 @@ generate_shared_slowpath(void *drcontext, instrlist_t *ilist, byte *pc)
13341360
OPND_CREATE_INT32(0)));
13351361
}
13361362
}
1337-
shared_slowpath_spill(drcontext, ilist, r1, SPILL_SLOT_1);
1338-
1339-
if (whole_bb_spills_enabled()) {
1340-
/* we need to put the app's reg values back into the
1341-
* whole-bb spill slots. slow_path() doesn't know which
1342-
* regs are being used, so we do it here via a two-step
1343-
* return process. if no regs are spilled we could
1344-
* skip this: but would need to xfer from slot2 to slot5,
1345-
* which would require a spill, so we don't bother.
1346-
*/
1347-
instru_insert_mov_pc(drcontext, ilist, NULL,
1348-
spill_slot_opnd(drcontext, SPILL_SLOT_5),
1349-
opnd_create_instr(return_point));
1350-
}
1363+
shared_slowpath_save_param(drcontext, ilist, r1);
1364+
shared_slowpath_restore(drcontext, ilist, r1, SPILL_SLOT_1);
13511365
PRE(ilist, NULL,
13521366
XINST_CREATE_jump(drcontext,
13531367
opnd_create_pc(shared_slowpath_entry)));
1354-
if (whole_bb_spills_enabled()) {
1355-
bool tgt_in_reg;
1356-
reg_id_t regtgt = REG_NULL;
1357-
PRE(ilist, NULL, return_point);
1358-
/* instrument_slowpath() re-arranges so the whole-bb
1359-
* spills are always r1 and r2 (have to be, since using
1360-
* slots 1 & 2)
1361-
*/
1362-
if (r2 >= SPILL_REG_EAX && r2 <= SPILL_REG_EBX) {
1363-
regtgt = DR_REG_XAX + (r2 - SPILL_REG_EAX);
1364-
PRE(ilist, NULL,
1365-
INSTR_CREATE_xchg
1366-
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_2),
1367-
opnd_create_reg(regtgt)));
1368-
tgt_in_reg = true;
1369-
} else
1370-
tgt_in_reg = false;
1371-
if (r1 >= SPILL_REG_EAX && r1 <= SPILL_REG_EBX) {
1372-
/* we use xchg instead of mov_st to support
1373-
* PR 493257 where slowpath put shared shadow addr
1374-
* into slot1 and we restore it to reg1 here
1375-
*/
1376-
PRE(ilist, NULL,
1377-
INSTR_CREATE_xchg
1378-
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_1),
1379-
opnd_create_reg(DR_REG_XAX +
1380-
(r1 - SPILL_REG_EAX))));
1381-
} else if (r1 >= SPILL_REG_EAX_DEAD && r1 <= SPILL_REG_EBX_DEAD) {
1382-
/* for PR 493257 we need to restore shared addr.
1383-
* should we split up if many bbs don't need this?
1384-
*/
1385-
PRE(ilist, NULL,
1386-
XINST_CREATE_load
1387-
(drcontext,
1388-
opnd_create_reg(DR_REG_XAX +
1389-
(r1 - SPILL_REG_EAX_DEAD)),
1390-
spill_slot_opnd(drcontext, SPILL_SLOT_1)));
1391-
}
1392-
if (tgt_in_reg) {
1393-
PRE(ilist, NULL,
1394-
XINST_CREATE_jump_reg(drcontext, opnd_create_reg(regtgt)));
1395-
} else {
1396-
PRE(ilist, NULL,
1397-
XINST_CREATE_jump_mem
1398-
(drcontext, spill_slot_opnd(drcontext, SPILL_SLOT_2)));
1399-
}
1400-
}
1401-
14021368
if (whole_bb_spills_enabled())
14031369
shared_slowpath_entry_global[r1][r2][r3] = pc;
14041370
else

drmemory/spill.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* **********************************************************
2-
* Copyright (c) 2010-2015 Google, Inc. All rights reserved.
2+
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
33
* Copyright (c) 2008-2010 VMware, Inc. All rights reserved.
44
* **********************************************************/
55

@@ -48,6 +48,12 @@ extern reg_id_t seg_tls;
4848
*/
4949
#define SPILL_SLOT_EFLAGS_EAX SPILL_SLOT_3
5050

51+
/* We separate the TLS slots we use to send params to the slowpath from those
52+
* used for reg preservation, to make using drreg simpler.
53+
*/
54+
#define SPILL_SLOT_SLOW_PARAM SPILL_SLOT_5
55+
#define SPILL_SLOT_SLOW_RET SPILL_SLOT_6
56+
5157
int
5258
spill_reg3_slot(bool eflags_dead, bool eax_dead, bool r1_dead, bool r2_dead);
5359

0 commit comments

Comments
 (0)