@@ -35,6 +35,10 @@ class BadJob(Exception):
35
35
pass
36
36
37
37
38
+ # special signal sent by the main process in case the worker process hasn't received a signal (eg. SIGTERM or SIGINT)
39
+ SIG_SUPERVISOR = signal .SIGRTMIN + 7
40
+
41
+
38
42
class BaseWorker (RedisMixin ):
39
43
"""
40
44
Base class for Workers to inherit from.
@@ -80,6 +84,7 @@ def __init__(self, *,
80
84
self .job_class = None # type: type # TODO
81
85
signal .signal (signal .SIGINT , self .handle_sig )
82
86
signal .signal (signal .SIGTERM , self .handle_sig )
87
+ signal .signal (SIG_SUPERVISOR , self .handle_supervisor_signal )
83
88
super ().__init__ (** kwargs ) # type: ignore # TODO
84
89
self ._closing_lock = asyncio .Lock (loop = self .loop )
85
90
@@ -176,15 +181,15 @@ async def work(self):
176
181
msg = await redis .blpop (* redis_queues , timeout = 1 )
177
182
if msg is None :
178
183
continue
179
- _queue , data = msg
180
- if self ._burst_mode and _queue == quit_queue :
184
+ raw_queue , data = msg
185
+ if self ._burst_mode and raw_queue == quit_queue :
181
186
work_logger .debug ('got job from the quit queue, stopping' )
182
187
break
183
- queue = queue_lookup [_queue ]
184
- work_logger .debug ('scheduling job from queue %s' , queue )
185
- await self .schedule_job (queue , data )
188
+ queue = queue_lookup [raw_queue ]
189
+ await self .schedule_job (data , queue , raw_queue )
186
190
187
- async def schedule_job (self , queue , data ):
191
+ async def schedule_job (self , data , queue , raw_queue ):
192
+ work_logger .debug ('scheduling job from queue %s' , queue )
188
193
job = self .job_class (queue , data )
189
194
190
195
pt_cnt = len (self ._pending_tasks )
@@ -193,6 +198,11 @@ async def schedule_job(self, queue, data):
193
198
_ , self ._pending_tasks = await asyncio .wait (self ._pending_tasks , loop = self .loop ,
194
199
return_when = asyncio .FIRST_COMPLETED )
195
200
201
+ if not self .running :
202
+ work_logger .warning ('job popped from queue, but exit is imminent, re-queueing the job' )
203
+ async with await self .get_redis_conn () as redis :
204
+ await redis .lpush (raw_queue , data )
205
+ return
196
206
task = self .loop .create_task (self .run_job (job ))
197
207
task .add_done_callback (self .job_callback )
198
208
self .loop .call_later (self .timeout_seconds , self .cancel_job , task , job )
@@ -280,9 +290,19 @@ async def close(self):
280
290
await super ().close ()
281
291
self ._closed = True
282
292
293
+ def handle_supervisor_signal (self , signum , frame ):
294
+ self .running = False
295
+ work_logger .warning ('pid=%d, got shutdown signal from main process, stopping...' , os .getpid ())
296
+ signal .signal (signal .SIGINT , self .handle_sig_force )
297
+ signal .signal (signal .SIGTERM , self .handle_sig_force )
298
+ signal .signal (signal .SIGALRM , self .handle_sig_force )
299
+ signal .alarm (self .shutdown_delay )
300
+ raise HandledExit ()
301
+
283
302
def handle_sig (self , signum , frame ):
284
303
self .running = False
285
304
work_logger .warning ('pid=%d, got signal: %s, stopping...' , os .getpid (), Signals (signum ).name )
305
+ signal .signal (SIG_SUPERVISOR , signal .SIG_IGN )
286
306
signal .signal (signal .SIGINT , self .handle_sig_force )
287
307
signal .signal (signal .SIGTERM , self .handle_sig_force )
288
308
signal .signal (signal .SIGALRM , self .handle_sig_force )
@@ -365,17 +385,18 @@ def run_worker(self, worker_path, worker_class, burst):
365
385
work_logger .critical ('worker process %s exited badly with exit code %s' ,
366
386
self .process .pid , self .process .exitcode )
367
387
sys .exit (3 )
368
- # could restart worker here, but better to leave it up to the real manager
388
+ # could restart worker here, but better to leave it up to the real manager eg. docker restart: always
369
389
370
390
def handle_sig (self , signum , frame ):
371
391
signal .signal (signal .SIGINT , self .handle_sig_force )
372
392
signal .signal (signal .SIGTERM , self .handle_sig_force )
373
393
work_logger .warning ('got signal: %s, waiting for worker pid=%s to finish...' , Signals (signum ).name ,
374
394
self .process and self .process .pid )
375
- for i in range (100 ): # pragma: no branch
376
- if not self .process or not self .process .is_alive ():
377
- return
378
- time .sleep (0.1 )
395
+ # sleep to make sure handle_sig above has executed if it's going to and detached handle_supervisor_signal
396
+ time .sleep (0.01 )
397
+ if self .process and self .process .is_alive ():
398
+ work_logger .debug ("sending custom shutdown signal to worker in case it didn't receive the signal" )
399
+ os .kill (self .process .pid , SIG_SUPERVISOR )
379
400
380
401
def handle_sig_force (self , signum , frame ):
381
402
work_logger .error ('got signal: %s again, forcing exit' , Signals (signum ).name )
0 commit comments