Skip to content

Commit 5f562c4

Browse files
authored
Merge pull request #1259 from undingen/rewriter_slots4
ICs: variable size IC slots
2 parents dc0a2f2 + 258a2b0 commit 5f562c4

File tree

11 files changed

+302
-229
lines changed

11 files changed

+302
-229
lines changed

src/asm_writing/assembler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ class Assembler {
206206
uint8_t* startAddr() const { return start_addr; }
207207
int bytesLeft() const { return end_addr - addr; }
208208
int bytesWritten() const { return addr - start_addr; }
209+
int size() const { return end_addr - start_addr; }
209210
uint8_t* curInstPointer() { return addr; }
210211
void setCurInstPointer(uint8_t* ptr) { addr = ptr; }
211212
bool isExactlyFull() const { return addr == end_addr; }

src/asm_writing/icinfo.cpp

Lines changed: 139 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -76,37 +76,46 @@ void ICInvalidator::invalidateAll() {
7676
void ICSlotInfo::clear() {
7777
ic->clear(this);
7878
decref_infos.clear();
79+
used = false;
7980
}
8081

81-
ICSlotRewrite::ICSlotRewrite(ICInfo* ic, const char* debug_name)
82-
: ic(ic), debug_name(debug_name), buf((uint8_t*)malloc(ic->getSlotSize())), assembler(buf, ic->getSlotSize()) {
83-
assembler.nop();
82+
std::unique_ptr<ICSlotRewrite> ICSlotRewrite::create(ICInfo* ic, const char* debug_name) {
83+
auto ic_entry = ic->pickEntryForRewrite(debug_name);
84+
if (!ic_entry)
85+
return NULL;
86+
return std::unique_ptr<ICSlotRewrite>(new ICSlotRewrite(ic_entry, debug_name));
87+
}
8488

89+
ICSlotRewrite::ICSlotRewrite(ICSlotInfo* ic_entry, const char* debug_name)
90+
: ic_entry(ic_entry),
91+
debug_name(debug_name),
92+
buf((uint8_t*)malloc(ic_entry->size)),
93+
assembler(buf, ic_entry->size) {
94+
// set num_inside = 1 to make sure that we will not have multiple rewriters at the same time rewriting the same slot
95+
assert(ic_entry->num_inside == 0);
96+
++ic_entry->num_inside;
97+
98+
assembler.nop();
8599
if (VERBOSITY() >= 4)
86100
printf("starting %s icentry\n", debug_name);
87101
}
88102

89103
ICSlotRewrite::~ICSlotRewrite() {
90104
free(buf);
105+
--ic_entry->num_inside;
91106
}
92107

93108
void ICSlotRewrite::abort() {
109+
auto ic = getICInfo();
94110
ic->retry_backoff = std::min(MAX_RETRY_BACKOFF, 2 * ic->retry_backoff);
95111
ic->retry_in = ic->retry_backoff;
96112
}
97113

98-
ICSlotInfo* ICSlotRewrite::prepareEntry() {
99-
this->ic_entry = ic->pickEntryForRewrite(debug_name);
100-
return this->ic_entry;
101-
}
102114

103-
uint8_t* ICSlotRewrite::getSlotStart() {
104-
assert(ic_entry != NULL);
105-
return (uint8_t*)ic->start_addr + ic_entry->idx * ic->getSlotSize();
106-
}
107115

108116
void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
109-
std::vector<std::pair<uint64_t, std::vector<Location>>> decref_infos) {
117+
std::vector<std::pair<uint64_t, std::vector<Location>>> decref_infos,
118+
llvm::ArrayRef<NextSlotJumpInfo> next_slot_jumps) {
110119
bool still_valid = true;
111120
for (int i = 0; i < dependencies.size(); i++) {
112121
int orig_version = dependencies[i].second;
@@ -124,10 +133,13 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
124133
return;
125134
}
126135

136+
auto ic = getICInfo();
127137
uint8_t* slot_start = getSlotStart();
128138
uint8_t* continue_point = (uint8_t*)ic->continue_addr;
129139

130-
bool do_commit = hook->finishAssembly(continue_point - slot_start);
140+
bool should_fill_with_nops = true;
141+
bool variable_size_slots = true;
142+
bool do_commit = hook->finishAssembly(continue_point - slot_start, should_fill_with_nops, variable_size_slots);
131143

132144
if (!do_commit) {
133145
for (auto p : gc_references)
@@ -136,6 +148,15 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
136148
}
137149

138150
assert(!assembler.hasFailed());
151+
int original_size = ic_entry->size;
152+
int actual_size = assembler.bytesWritten();
153+
int empty_space = original_size - actual_size;
154+
assert(actual_size <= original_size);
155+
assert(assembler.size() == original_size);
156+
if (should_fill_with_nops) {
157+
assembler.fillWithNops();
158+
assert(original_size == assembler.bytesWritten());
159+
}
139160

140161
for (int i = 0; i < dependencies.size(); i++) {
141162
ICInvalidator* invalidator = dependencies[i].first;
@@ -144,14 +165,43 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
144165

145166
ic->next_slot_to_try++;
146167

168+
// we can create a new IC slot if this is the last slot in the IC in addition we are checking that the new slot is
169+
// at least as big as the current one.
170+
bool should_create_new_slot = variable_size_slots && &ic->slots.back() == ic_entry && empty_space >= actual_size;
171+
if (should_create_new_slot) {
172+
// reduce size of the current slot to the real size
173+
ic_entry->size = actual_size;
174+
175+
// after resizing this slot we need to patch the jumps to the next slot
176+
Assembler new_asm(assembler.getStartAddr(), original_size);
177+
for (auto&& jump : next_slot_jumps) {
178+
auto jmp_inst_offset = std::get<0>(jump);
179+
auto jmp_inst_end = std::get<1>(jump);
180+
auto jmp_condition = std::get<2>(jump);
181+
new_asm.setCurInstPointer(assembler.getStartAddr() + jmp_inst_offset);
182+
new_asm.jmp_cond(assembler::JumpDestination::fromStart(actual_size), jmp_condition);
183+
184+
// we often end up using a smaller encoding so we have to make sure we fill the space with nops
185+
while (new_asm.bytesWritten() < jmp_inst_end)
186+
new_asm.nop();
187+
}
188+
189+
// put a jump to the slowpath at the beginning of the new slot
190+
Assembler asm_next_slot(assembler.getStartAddr() + actual_size, empty_space);
191+
asm_next_slot.jmp(JumpDestination::fromStart(empty_space));
192+
193+
// add the new slot
194+
ic->slots.emplace_back(ic, ic_entry->start_addr + actual_size, empty_space);
195+
}
196+
147197
// if (VERBOSITY()) printf("Commiting to %p-%p\n", start, start + ic->slot_size);
148-
memcpy(slot_start, buf, ic->getSlotSize());
198+
memcpy(slot_start, buf, original_size);
149199

150200
for (auto p : ic_entry->gc_references) {
151201
Py_DECREF(p);
152202
}
153203
ic_entry->gc_references = std::move(gc_references);
154-
204+
ic_entry->used = true;
155205
ic->times_rewritten++;
156206

157207
if (ic->times_rewritten == IC_MEGAMORPHIC_THRESHOLD) {
@@ -174,49 +224,66 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
174224
ic_entry->decref_infos.emplace_back(decref_info.first, std::move(merged_locations));
175225
}
176226

177-
llvm::sys::Memory::InvalidateInstructionCache(slot_start, ic->getSlotSize());
227+
llvm::sys::Memory::InvalidateInstructionCache(slot_start, original_size);
178228
}
179229

180230
void ICSlotRewrite::addDependenceOn(ICInvalidator& invalidator) {
181231
dependencies.push_back(std::make_pair(&invalidator, invalidator.version()));
182232
}
183233

184-
int ICSlotRewrite::getSlotSize() {
185-
return ic->getSlotSize();
186-
}
187-
188-
int ICSlotRewrite::getScratchRspOffset() {
189-
assert(ic->stack_info.scratch_size);
190-
return ic->stack_info.scratch_rsp_offset;
191-
}
234+
int ICInfo::calculateSuggestedSize() {
235+
// if we never rewrote this IC just return the whole IC size for now
236+
if (!times_rewritten)
237+
return slots[0].size;
192238

193-
int ICSlotRewrite::getScratchSize() {
194-
return ic->stack_info.scratch_size;
195-
}
196-
197-
TypeRecorder* ICSlotRewrite::getTypeRecorder() {
198-
return ic->type_recorder;
199-
}
239+
int additional_space_per_slot = 30;
240+
// if there are less rewrites than slots we can give a very accurate estimate
241+
if (times_rewritten < slots.size()) {
242+
// add up the sizes of all used slots
243+
int size = 0;
244+
for (int i = 0; i < times_rewritten; ++i) {
245+
size += slots[i].size + additional_space_per_slot;
246+
}
247+
return size;
248+
}
200249

201-
assembler::GenericRegister ICSlotRewrite::returnRegister() {
202-
return ic->return_register;
250+
// get total size of IC
251+
int size = 0;
252+
for (auto&& slot : slots) {
253+
size += slot.size;
254+
}
255+
// make it bigger
256+
if (isMegamorphic())
257+
size *= 4;
258+
else
259+
size *= 2;
260+
return std::min(size, 4096);
203261
}
204262

205-
206-
207263
std::unique_ptr<ICSlotRewrite> ICInfo::startRewrite(const char* debug_name) {
208-
return std::unique_ptr<ICSlotRewrite>(new ICSlotRewrite(this, debug_name));
264+
return ICSlotRewrite::create(this, debug_name);
209265
}
210266

211267
ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
212-
int num_slots = getNumSlots();
268+
int num_slots = slots.size();
269+
int fallback_to_in_use_slot = -1;
270+
271+
// we prefer to use a unused slot and if non is available we will fallback to a slot which is in use (but no one is
272+
// inside)
213273
for (int _i = 0; _i < num_slots; _i++) {
214274
int i = (_i + next_slot_to_try) % num_slots;
215275

216276
ICSlotInfo& sinfo = slots[i];
217277
assert(sinfo.num_inside >= 0);
218-
if (sinfo.num_inside)
278+
279+
if (sinfo.num_inside || sinfo.size == 0)
280+
continue;
281+
282+
if (sinfo.used) {
283+
if (fallback_to_in_use_slot == -1)
284+
fallback_to_in_use_slot = i;
219285
continue;
286+
}
220287

221288
if (VERBOSITY() >= 4) {
222289
printf("picking %s icentry to in-use slot %d at %p\n", debug_name, i, start_addr);
@@ -225,21 +292,28 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
225292
next_slot_to_try = i;
226293
return &sinfo;
227294
}
295+
296+
if (fallback_to_in_use_slot != -1) {
297+
if (VERBOSITY() >= 4) {
298+
printf("picking %s icentry to in-use slot %d at %p\n", debug_name, fallback_to_in_use_slot, start_addr);
299+
}
300+
301+
next_slot_to_try = fallback_to_in_use_slot;
302+
return &slots[fallback_to_in_use_slot];
303+
}
304+
228305
if (VERBOSITY() >= 4)
229306
printf("not committing %s icentry since there are no available slots\n", debug_name);
230307
return NULL;
231308
}
232309

233310
static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
234311

235-
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
236-
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
237-
assembler::GenericRegister return_register, TypeRecorder* type_recorder,
238-
std::vector<Location> ic_global_decref_locations)
312+
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int size,
313+
llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register,
314+
TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations)
239315
: next_slot_to_try(0),
240316
stack_info(stack_info),
241-
num_slots(num_slots),
242-
slot_size(slot_size),
243317
calling_conv(calling_conv),
244318
live_outs(std::move(_live_outs)),
245319
return_register(return_register),
@@ -251,10 +325,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
251325
start_addr(start_addr),
252326
slowpath_rtn_addr(slowpath_rtn_addr),
253327
continue_addr(continue_addr) {
254-
slots.reserve(num_slots);
255-
for (int i = 0; i < num_slots; i++) {
256-
slots.emplace_back(this, i);
257-
}
328+
slots.emplace_back(this, (uint8_t*)start_addr, size);
258329
if (slowpath_rtn_addr && !this->ic_global_decref_locations.empty())
259330
slowpath_decref_info = DecrefInfo((uint64_t)slowpath_rtn_addr, this->ic_global_decref_locations);
260331
}
@@ -283,7 +354,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
283354
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
284355
const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs,
285356
std::vector<Location> decref_info) {
286-
assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
357+
assert(slowpath_start_addr - start_addr >= ic->size);
287358
assert(slowpath_rtn_addr > slowpath_start_addr);
288359
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
289360

@@ -305,22 +376,13 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
305376
// we can let the user just slide down the nop section, but instead
306377
// emit jumps to the end.
307378
// Not sure if this is worth it or not?
308-
for (int i = 0; i < ic->num_slots; i++) {
309-
uint8_t* start = start_addr + i * ic->slot_size;
310-
// std::unique_ptr<MCWriter> writer(createMCWriter(start, ic->slot_size * (ic->num_slots - i), 0));
311-
// writer->emitNop();
312-
// writer->emitGuardFalse();
313-
314-
Assembler writer(start, ic->slot_size);
315-
writer.nop();
316-
// writer.trap();
317-
// writer.jmp(JumpDestination::fromStart(ic->slot_size * (ic->num_slots - i)));
318-
writer.jmp(JumpDestination::fromStart(slowpath_start_addr - start));
319-
}
379+
Assembler writer(start_addr, ic->size);
380+
writer.nop();
381+
writer.jmp(JumpDestination::fromStart(slowpath_start_addr - start_addr));
320382

321383
ICInfo* icinfo
322-
= new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
323-
ic->getCallingConvention(), std::move(live_outs), return_register, ic->type_recorder, decref_info);
384+
= new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size, ic->getCallingConvention(),
385+
std::move(live_outs), return_register, ic->type_recorder, decref_info);
324386

325387
assert(!ics_by_return_addr.count(slowpath_rtn_addr));
326388
ics_by_return_addr[slowpath_rtn_addr] = icinfo;
@@ -350,14 +412,14 @@ ICInfo* getICInfo(void* rtn_addr) {
350412
void ICInfo::clear(ICSlotInfo* icentry) {
351413
assert(icentry);
352414

353-
uint8_t* start = (uint8_t*)start_addr + icentry->idx * getSlotSize();
415+
uint8_t* start = (uint8_t*)icentry->start_addr;
354416

355417
if (VERBOSITY() >= 4)
356418
printf("clearing patchpoint %p, slot at %p\n", start_addr, start);
357419

358-
Assembler writer(start, getSlotSize());
420+
Assembler writer(start, icentry->size);
359421
writer.nop();
360-
writer.jmp(JumpDestination::fromStart(getSlotSize()));
422+
writer.jmp(JumpDestination::fromStart(icentry->size));
361423
assert(writer.bytesWritten() <= IC_INVALDITION_HEADER_SIZE);
362424

363425
for (auto p : icentry->gc_references) {
@@ -370,7 +432,17 @@ void ICInfo::clear(ICSlotInfo* icentry) {
370432
// writer->emitGuardFalse();
371433

372434
// writer->endWithSlowpath();
373-
llvm::sys::Memory::InvalidateInstructionCache(start, getSlotSize());
435+
436+
llvm::sys::Memory::InvalidateInstructionCache(start, icentry->size);
437+
438+
for (int i = 0; i < slots.size(); ++i) {
439+
if (&slots[i] == icentry) {
440+
next_slot_to_try = i;
441+
break;
442+
}
443+
}
444+
445+
icentry->used = false;
374446
}
375447

376448
bool ICInfo::shouldAttempt() {

0 commit comments

Comments
 (0)