@@ -76,37 +76,46 @@ void ICInvalidator::invalidateAll() {
7676void ICSlotInfo::clear () {
7777 ic->clear (this );
7878 decref_infos.clear ();
79+ used = false ;
7980}
8081
81- ICSlotRewrite::ICSlotRewrite (ICInfo* ic, const char * debug_name)
82- : ic(ic), debug_name(debug_name), buf((uint8_t *)malloc(ic->getSlotSize ())), assembler(buf, ic->getSlotSize ()) {
83- assembler.nop ();
82+ std::unique_ptr<ICSlotRewrite> ICSlotRewrite::create (ICInfo* ic, const char * debug_name) {
83+ auto ic_entry = ic->pickEntryForRewrite (debug_name);
84+ if (!ic_entry)
85+ return NULL ;
86+ return std::unique_ptr<ICSlotRewrite>(new ICSlotRewrite (ic_entry, debug_name));
87+ }
8488
89+ ICSlotRewrite::ICSlotRewrite (ICSlotInfo* ic_entry, const char * debug_name)
90+ : ic_entry(ic_entry),
91+ debug_name (debug_name),
92+ buf((uint8_t *)malloc(ic_entry->size)),
93+ assembler(buf, ic_entry->size) {
94+ // set num_inside = 1 to make sure that we will not have multiple rewriters at the same time rewriting the same slot
95+ assert (ic_entry->num_inside == 0 );
96+ ++ic_entry->num_inside ;
97+
98+ assembler.nop ();
8599 if (VERBOSITY () >= 4 )
86100 printf (" starting %s icentry\n " , debug_name);
87101}
88102
89103ICSlotRewrite::~ICSlotRewrite () {
90104 free (buf);
105+ --ic_entry->num_inside ;
91106}
92107
93108void ICSlotRewrite::abort () {
109+ auto ic = getICInfo ();
94110 ic->retry_backoff = std::min (MAX_RETRY_BACKOFF, 2 * ic->retry_backoff );
95111 ic->retry_in = ic->retry_backoff ;
96112}
97113
98- ICSlotInfo* ICSlotRewrite::prepareEntry () {
99- this ->ic_entry = ic->pickEntryForRewrite (debug_name);
100- return this ->ic_entry ;
101- }
102114
103- uint8_t * ICSlotRewrite::getSlotStart () {
104- assert (ic_entry != NULL );
105- return (uint8_t *)ic->start_addr + ic_entry->idx * ic->getSlotSize ();
106- }
107115
108116void ICSlotRewrite::commit (CommitHook* hook, std::vector<void *> gc_references,
109- std::vector<std::pair<uint64_t , std::vector<Location>>> decref_infos) {
117+ std::vector<std::pair<uint64_t , std::vector<Location>>> decref_infos,
118+ llvm::ArrayRef<NextSlotJumpInfo> next_slot_jumps) {
110119 bool still_valid = true ;
111120 for (int i = 0 ; i < dependencies.size (); i++) {
112121 int orig_version = dependencies[i].second ;
@@ -124,10 +133,13 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
124133 return ;
125134 }
126135
136+ auto ic = getICInfo ();
127137 uint8_t * slot_start = getSlotStart ();
128138 uint8_t * continue_point = (uint8_t *)ic->continue_addr ;
129139
130- bool do_commit = hook->finishAssembly (continue_point - slot_start);
140+ bool should_fill_with_nops = true ;
141+ bool variable_size_slots = true ;
142+ bool do_commit = hook->finishAssembly (continue_point - slot_start, should_fill_with_nops, variable_size_slots);
131143
132144 if (!do_commit) {
133145 for (auto p : gc_references)
@@ -136,6 +148,15 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
136148 }
137149
138150 assert (!assembler.hasFailed ());
151+ int original_size = ic_entry->size ;
152+ int actual_size = assembler.bytesWritten ();
153+ int empty_space = original_size - actual_size;
154+ assert (actual_size <= original_size);
155+ assert (assembler.size () == original_size);
156+ if (should_fill_with_nops) {
157+ assembler.fillWithNops ();
158+ assert (original_size == assembler.bytesWritten ());
159+ }
139160
140161 for (int i = 0 ; i < dependencies.size (); i++) {
141162 ICInvalidator* invalidator = dependencies[i].first ;
@@ -144,14 +165,43 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
144165
145166 ic->next_slot_to_try ++;
146167
168+ // we can create a new IC slot if this is the last slot in the IC in addition we are checking that the new slot is
169+ // at least as big as the current one.
170+ bool should_create_new_slot = variable_size_slots && &ic->slots .back () == ic_entry && empty_space >= actual_size;
171+ if (should_create_new_slot) {
172+ // reduce size of the current slot to the real size
173+ ic_entry->size = actual_size;
174+
175+ // after resizing this slot we need to patch the jumps to the next slot
176+ Assembler new_asm (assembler.getStartAddr (), original_size);
177+ for (auto && jump : next_slot_jumps) {
178+ auto jmp_inst_offset = std::get<0 >(jump);
179+ auto jmp_inst_end = std::get<1 >(jump);
180+ auto jmp_condition = std::get<2 >(jump);
181+ new_asm.setCurInstPointer (assembler.getStartAddr () + jmp_inst_offset);
182+ new_asm.jmp_cond (assembler::JumpDestination::fromStart (actual_size), jmp_condition);
183+
184+ // we often end up using a smaller encoding so we have to make sure we fill the space with nops
185+ while (new_asm.bytesWritten () < jmp_inst_end)
186+ new_asm.nop ();
187+ }
188+
189+ // put a jump to the slowpath at the beginning of the new slot
190+ Assembler asm_next_slot (assembler.getStartAddr () + actual_size, empty_space);
191+ asm_next_slot.jmp (JumpDestination::fromStart (empty_space));
192+
193+ // add the new slot
194+ ic->slots .emplace_back (ic, ic_entry->start_addr + actual_size, empty_space);
195+ }
196+
147197 // if (VERBOSITY()) printf("Commiting to %p-%p\n", start, start + ic->slot_size);
148- memcpy (slot_start, buf, ic-> getSlotSize () );
198+ memcpy (slot_start, buf, original_size );
149199
150200 for (auto p : ic_entry->gc_references ) {
151201 Py_DECREF (p);
152202 }
153203 ic_entry->gc_references = std::move (gc_references);
154-
204+ ic_entry-> used = true ;
155205 ic->times_rewritten ++;
156206
157207 if (ic->times_rewritten == IC_MEGAMORPHIC_THRESHOLD) {
@@ -174,49 +224,66 @@ void ICSlotRewrite::commit(CommitHook* hook, std::vector<void*> gc_references,
174224 ic_entry->decref_infos .emplace_back (decref_info.first , std::move (merged_locations));
175225 }
176226
177- llvm::sys::Memory::InvalidateInstructionCache (slot_start, ic-> getSlotSize () );
227+ llvm::sys::Memory::InvalidateInstructionCache (slot_start, original_size );
178228}
179229
180230void ICSlotRewrite::addDependenceOn (ICInvalidator& invalidator) {
181231 dependencies.push_back (std::make_pair (&invalidator, invalidator.version ()));
182232}
183233
184- int ICSlotRewrite::getSlotSize () {
185- return ic->getSlotSize ();
186- }
187-
188- int ICSlotRewrite::getScratchRspOffset () {
189- assert (ic->stack_info .scratch_size );
190- return ic->stack_info .scratch_rsp_offset ;
191- }
234+ int ICInfo::calculateSuggestedSize () {
235+ // if we never rewrote this IC just return the whole IC size for now
236+ if (!times_rewritten)
237+ return slots[0 ].size ;
192238
193- int ICSlotRewrite::getScratchSize () {
194- return ic->stack_info .scratch_size ;
195- }
196-
197- TypeRecorder* ICSlotRewrite::getTypeRecorder () {
198- return ic->type_recorder ;
199- }
239+ int additional_space_per_slot = 30 ;
240+ // if there are less rewrites than slots we can give a very accurate estimate
241+ if (times_rewritten < slots.size ()) {
242+ // add up the sizes of all used slots
243+ int size = 0 ;
244+ for (int i = 0 ; i < times_rewritten; ++i) {
245+ size += slots[i].size + additional_space_per_slot;
246+ }
247+ return size;
248+ }
200249
201- assembler::GenericRegister ICSlotRewrite::returnRegister () {
202- return ic->return_register ;
250+ // get total size of IC
251+ int size = 0 ;
252+ for (auto && slot : slots) {
253+ size += slot.size ;
254+ }
255+ // make it bigger
256+ if (isMegamorphic ())
257+ size *= 4 ;
258+ else
259+ size *= 2 ;
260+ return std::min (size, 4096 );
203261}
204262
205-
206-
207263std::unique_ptr<ICSlotRewrite> ICInfo::startRewrite (const char * debug_name) {
208- return std::unique_ptr<ICSlotRewrite>( new ICSlotRewrite ( this , debug_name) );
264+ return ICSlotRewrite::create ( this , debug_name);
209265}
210266
211267ICSlotInfo* ICInfo::pickEntryForRewrite (const char * debug_name) {
212- int num_slots = getNumSlots ();
268+ int num_slots = slots.size ();
269+ int fallback_to_in_use_slot = -1 ;
270+
271+ // we prefer to use a unused slot and if non is available we will fallback to a slot which is in use (but no one is
272+ // inside)
213273 for (int _i = 0 ; _i < num_slots; _i++) {
214274 int i = (_i + next_slot_to_try) % num_slots;
215275
216276 ICSlotInfo& sinfo = slots[i];
217277 assert (sinfo.num_inside >= 0 );
218- if (sinfo.num_inside )
278+
279+ if (sinfo.num_inside || sinfo.size == 0 )
280+ continue ;
281+
282+ if (sinfo.used ) {
283+ if (fallback_to_in_use_slot == -1 )
284+ fallback_to_in_use_slot = i;
219285 continue ;
286+ }
220287
221288 if (VERBOSITY () >= 4 ) {
222289 printf (" picking %s icentry to in-use slot %d at %p\n " , debug_name, i, start_addr);
@@ -225,21 +292,28 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
225292 next_slot_to_try = i;
226293 return &sinfo;
227294 }
295+
296+ if (fallback_to_in_use_slot != -1 ) {
297+ if (VERBOSITY () >= 4 ) {
298+ printf (" picking %s icentry to in-use slot %d at %p\n " , debug_name, fallback_to_in_use_slot, start_addr);
299+ }
300+
301+ next_slot_to_try = fallback_to_in_use_slot;
302+ return &slots[fallback_to_in_use_slot];
303+ }
304+
228305 if (VERBOSITY () >= 4 )
229306 printf (" not committing %s icentry since there are no available slots\n " , debug_name);
230307 return NULL ;
231308}
232309
233310static llvm::DenseMap<void *, ICInfo*> ics_by_return_addr;
234311
235- ICInfo::ICInfo (void * start_addr, void * slowpath_rtn_addr, void * continue_addr, StackInfo stack_info, int num_slots,
236- int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
237- assembler::GenericRegister return_register, TypeRecorder* type_recorder,
238- std::vector<Location> ic_global_decref_locations)
312+ ICInfo::ICInfo (void * start_addr, void * slowpath_rtn_addr, void * continue_addr, StackInfo stack_info, int size,
313+ llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs, assembler::GenericRegister return_register,
314+ TypeRecorder* type_recorder, std::vector<Location> ic_global_decref_locations)
239315 : next_slot_to_try(0 ),
240316 stack_info (stack_info),
241- num_slots(num_slots),
242- slot_size(slot_size),
243317 calling_conv(calling_conv),
244318 live_outs(std::move(_live_outs)),
245319 return_register(return_register),
@@ -251,10 +325,7 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
251325 start_addr(start_addr),
252326 slowpath_rtn_addr(slowpath_rtn_addr),
253327 continue_addr(continue_addr) {
254- slots.reserve (num_slots);
255- for (int i = 0 ; i < num_slots; i++) {
256- slots.emplace_back (this , i);
257- }
328+ slots.emplace_back (this , (uint8_t *)start_addr, size);
258329 if (slowpath_rtn_addr && !this ->ic_global_decref_locations .empty ())
259330 slowpath_decref_info = DecrefInfo ((uint64_t )slowpath_rtn_addr, this ->ic_global_decref_locations );
260331}
@@ -283,7 +354,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
283354 uint8_t * continue_addr, uint8_t * slowpath_rtn_addr,
284355 const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs,
285356 std::vector<Location> decref_info) {
286- assert (slowpath_start_addr - start_addr >= ic->num_slots * ic-> slot_size );
357+ assert (slowpath_start_addr - start_addr >= ic->size );
287358 assert (slowpath_rtn_addr > slowpath_start_addr);
288359 assert (slowpath_rtn_addr <= start_addr + ic->totalSize ());
289360
@@ -305,22 +376,13 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
305376 // we can let the user just slide down the nop section, but instead
306377 // emit jumps to the end.
307378 // Not sure if this is worth it or not?
308- for (int i = 0 ; i < ic->num_slots ; i++) {
309- uint8_t * start = start_addr + i * ic->slot_size ;
310- // std::unique_ptr<MCWriter> writer(createMCWriter(start, ic->slot_size * (ic->num_slots - i), 0));
311- // writer->emitNop();
312- // writer->emitGuardFalse();
313-
314- Assembler writer (start, ic->slot_size );
315- writer.nop ();
316- // writer.trap();
317- // writer.jmp(JumpDestination::fromStart(ic->slot_size * (ic->num_slots - i)));
318- writer.jmp (JumpDestination::fromStart (slowpath_start_addr - start));
319- }
379+ Assembler writer (start_addr, ic->size );
380+ writer.nop ();
381+ writer.jmp (JumpDestination::fromStart (slowpath_start_addr - start_addr));
320382
321383 ICInfo* icinfo
322- = new ICInfo (start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots , ic->slot_size ,
323- ic-> getCallingConvention (), std::move (live_outs), return_register, ic->type_recorder , decref_info);
384+ = new ICInfo (start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->size , ic->getCallingConvention () ,
385+ std::move (live_outs), return_register, ic->type_recorder , decref_info);
324386
325387 assert (!ics_by_return_addr.count (slowpath_rtn_addr));
326388 ics_by_return_addr[slowpath_rtn_addr] = icinfo;
@@ -350,14 +412,14 @@ ICInfo* getICInfo(void* rtn_addr) {
350412void ICInfo::clear (ICSlotInfo* icentry) {
351413 assert (icentry);
352414
353- uint8_t * start = (uint8_t *)start_addr + icentry->idx * getSlotSize () ;
415+ uint8_t * start = (uint8_t *)icentry->start_addr ;
354416
355417 if (VERBOSITY () >= 4 )
356418 printf (" clearing patchpoint %p, slot at %p\n " , start_addr, start);
357419
358- Assembler writer (start, getSlotSize () );
420+ Assembler writer (start, icentry-> size );
359421 writer.nop ();
360- writer.jmp (JumpDestination::fromStart (getSlotSize () ));
422+ writer.jmp (JumpDestination::fromStart (icentry-> size ));
361423 assert (writer.bytesWritten () <= IC_INVALDITION_HEADER_SIZE);
362424
363425 for (auto p : icentry->gc_references ) {
@@ -370,7 +432,17 @@ void ICInfo::clear(ICSlotInfo* icentry) {
370432 // writer->emitGuardFalse();
371433
372434 // writer->endWithSlowpath();
373- llvm::sys::Memory::InvalidateInstructionCache (start, getSlotSize ());
435+
436+ llvm::sys::Memory::InvalidateInstructionCache (start, icentry->size );
437+
438+ for (int i = 0 ; i < slots.size (); ++i) {
439+ if (&slots[i] == icentry) {
440+ next_slot_to_try = i;
441+ break ;
442+ }
443+ }
444+
445+ icentry->used = false ;
374446}
375447
376448bool ICInfo::shouldAttempt () {
0 commit comments