Skip to content

Commit 8461385

Browse files
authored
Merge pull request #645 from oliver-sanders/643
cat-log: add timeout to processes
2 parents 08a20c3 + b137137 commit 8461385

File tree

4 files changed

+141
-11
lines changed

4 files changed

+141
-11
lines changed

changes.d/645.fix.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add a default timeout for the `cylc cat-log` command which is used to provide access to log files in the cylc-ui.
2+
This timeout can be adjusted with the `log_timeout` option.

cylc/uiserver/app.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,26 @@ class CylcUIServer(ExtensionApp):
340340
default_value=False,
341341
)
342342

343+
log_timeout = Float(
344+
# Note: This timeout it intended to clean up log streams that are no
345+
# longer being actively monitored and prevent the associated "cat-log"
346+
# processes from persisting in situations where they should not be
347+
# (e.g. if the websocket connection unexpectedly closes)
348+
config=True,
349+
help='''
350+
The maximum length of time Cylc will stream a log file for in
351+
seconds.
352+
353+
The "Log" view in the Cylc GUI streams log files allowing you to
354+
monitor the file while is grows.
355+
356+
After the configured timeout, the stream will close. The log
357+
view in the GUI will display a "reconnect" button allowing you
358+
to restart the stream if desired.
359+
''',
360+
default_value=(60 * 60 * 4), # 4 hours
361+
)
362+
343363
@validate('ui_build_dir')
344364
def _check_ui_build_dir_exists(self, proposed):
345365
if proposed['value'].exists():
@@ -408,6 +428,7 @@ def __init__(self, *args, **kwargs):
408428
# sub_status dictionary storing status of subscriptions
409429
self.sub_statuses = {}
410430
self.resolvers = Resolvers(
431+
self,
411432
self.data_store_mgr,
412433
log=self.log,
413434
executor=self.executor,

cylc/uiserver/resolvers.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
PIPE,
2828
Popen,
2929
)
30+
from time import time
3031
from typing import (
3132
TYPE_CHECKING,
3233
Any,
@@ -57,6 +58,7 @@
5758
from cylc.flow.option_parsers import Options
5859
from graphql import ResolveInfo
5960

61+
from cylc.uiserver.app import CylcUIServer
6062
from cylc.uiserver.workflows_mgr import WorkflowsManager
6163

6264

@@ -205,6 +207,9 @@ def _clean(workflow_ids, opts):
205207
class Services:
206208
"""Cylc services provided by the UI Server."""
207209

210+
# log file stream lag
211+
CAT_LOG_SLEEP = 1
212+
208213
@staticmethod
209214
def _error(message: Union[Exception, str]):
210215
"""Format error case response."""
@@ -351,7 +356,7 @@ async def enqueue(stream, queue):
351356
await queue.put(line.decode())
352357

353358
@classmethod
354-
async def cat_log(cls, id_: Tokens, log, info, file=None):
359+
async def cat_log(cls, id_: Tokens, app: 'CylcUIServer', info, file=None):
355360
"""Calls `cat log`.
356361
357362
Used for log subscriptions.
@@ -366,7 +371,7 @@ async def cat_log(cls, id_: Tokens, log, info, file=None):
366371
]
367372
if file:
368373
cmd += ['-f', file]
369-
log.info(f'$ {" ".join(cmd)}')
374+
app.log.info(f'$ {" ".join(cmd)}')
370375

371376
# For info, below subprocess is safe (uses shell=false by default)
372377
proc = await asyncio.subprocess.create_subprocess_exec(
@@ -380,26 +385,51 @@ async def cat_log(cls, id_: Tokens, log, info, file=None):
380385
# This is to get around problem where stream is not EOF until
381386
# subprocess ends
382387
enqueue_task = asyncio.create_task(cls.enqueue(proc.stdout, queue))
388+
389+
# GraphQL operation ID
383390
op_id = info.root_value
391+
392+
# track the number of lines received so far
384393
line_count = 0
394+
395+
# the time we started the cylc cat-log process
396+
start_time = time()
397+
398+
# configured cat-log process timeout
399+
timeout = float(app.log_timeout)
400+
385401
try:
386402
while info.context['sub_statuses'].get(op_id) != 'stop':
403+
if time() - start_time > timeout:
404+
# timeout exceeded -> kill the cat-log process
405+
break
406+
387407
if queue.empty():
408+
# there are *no* lines to read from the cat-log process
388409
if buffer:
410+
# yield everything in the buffer
389411
yield {'lines': list(buffer)}
390412
buffer.clear()
413+
391414
if proc.returncode is not None:
415+
# process exited
416+
# -> pass any stderr text to the client
392417
(_, stderr) = await proc.communicate()
393-
# pass any error onto ui
394418
msg = process_cat_log_stderr(stderr) or (
395419
f"cylc cat-log exited {proc.returncode}"
396420
)
397421
yield {'error': msg}
422+
423+
# stop reading log lines
398424
break
425+
399426
# sleep set at 1, which matches the `tail` default interval
400-
await asyncio.sleep(1)
427+
await asyncio.sleep(cls.CAT_LOG_SLEEP)
428+
401429
else:
430+
# there *are* lines to read from the cat-log process
402431
if line_count > MAX_LINES:
432+
# we have read beyond the line count
403433
yield {'lines': buffer}
404434
yield {
405435
'error': (
@@ -408,25 +438,39 @@ async def cat_log(cls, id_: Tokens, log, info, file=None):
408438
)
409439
}
410440
break
441+
411442
elif line_count == 0:
443+
# this is the first line
444+
# (this is a special line contains the file path)
412445
line_count += 1
413446
yield {
414447
'connected': True,
415448
'path': (await queue.get())[2:].strip(),
416449
}
417450
continue
451+
452+
# read in the log lines and add them to the buffer
418453
line = await queue.get()
419454
line_count += 1
420455
buffer.append(line)
421456
if len(buffer) >= 75:
422457
yield {'lines': list(buffer)}
423458
buffer.clear()
459+
# there is more text to read so don't sleep (but
460+
# still "sleep(0)" to yield control to other
461+
# coroutines)
424462
await asyncio.sleep(0)
463+
425464
finally:
465+
# kill the cat-log process
426466
kill_process_tree(proc.pid)
467+
468+
# terminate the queue
427469
enqueue_task.cancel()
428470
with suppress(asyncio.CancelledError):
429471
await enqueue_task
472+
473+
# tell the client we have disconnected
430474
yield {'connected': False}
431475

432476
@classmethod
@@ -467,13 +511,15 @@ class Resolvers(BaseResolvers):
467511

468512
def __init__(
469513
self,
514+
app: 'CylcUIServer',
470515
data: 'DataStoreMgr',
471516
log: 'Logger',
472517
workflows_mgr: 'WorkflowsManager',
473518
executor,
474519
**kwargs
475520
):
476521
super().__init__(data)
522+
self.app = app
477523
self.log = log
478524
self.workflows_mgr = workflows_mgr
479525
self.executor = executor
@@ -561,7 +607,7 @@ async def subscription_service(
561607
):
562608
async for ret in Services.cat_log(
563609
ids[0],
564-
self.log,
610+
self.app,
565611
info,
566612
file
567613
):

cylc/uiserver/tests/test_resolvers.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,27 @@
1+
# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
2+
#
3+
# This program is free software: you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License as published by
5+
# the Free Software Foundation, either version 3 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# This program is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License
14+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
116
import asyncio
2-
from typing import Any, Dict, List, Optional, Tuple
17+
from typing import Any, Dict, List, Tuple
318
from async_timeout import timeout
419
import logging
520
import os
621
import pytest
722
from unittest.mock import MagicMock, Mock
823
from subprocess import Popen, TimeoutExpired
24+
from types import SimpleNamespace
925

1026
from cylc.flow import CYLC_LOG
1127
from cylc.flow.id import Tokens
@@ -228,7 +244,23 @@ def wait(timeout):
228244
]
229245

230246

231-
async def test_cat_log(workflow_run_dir):
247+
@pytest.fixture
248+
def app():
249+
return SimpleNamespace(
250+
log=logging.getLogger(CYLC_LOG),
251+
log_timeout=10,
252+
)
253+
254+
255+
@pytest.fixture
256+
def fast_sleep(monkeypatch):
257+
monkeypatch.setattr(
258+
'cylc.uiserver.resolvers.Services.CAT_LOG_SLEEP',
259+
0.1,
260+
)
261+
262+
263+
async def test_cat_log(workflow_run_dir, app, fast_sleep):
232264
"""This is a functional test for cat_log subscription resolver.
233265
234266
It creates a log file and then runs the cat_log service. Checking it
@@ -265,18 +297,17 @@ async def test_cat_log(workflow_run_dir):
265297
# mock the context
266298
info.context = {'sub_statuses': {2: "start"}}
267299
workflow = Tokens(id_)
268-
log = logging.getLogger(CYLC_LOG)
269-
# note - timeout tests that the cat-log process is being stopped correctly
270300

301+
# note - timeout tests that the cat-log process is being stopped correctly
271302
first_response = None
272303
async with timeout(20):
273-
ret = services.cat_log(workflow, log, info)
304+
ret = services.cat_log(workflow, app, info)
274305
actual = ''
275306
is_first = True
276307
async for response in ret:
277308
if err := response.get('error'):
278309
# Surface any unexpected errors for better visibility
279-
log.exception(err)
310+
app.log.exception(err)
280311
if is_first:
281312
first_response = response
282313
is_first = False
@@ -298,6 +329,36 @@ async def test_cat_log(workflow_run_dir):
298329
assert actual.rstrip() == log_file_content.rstrip()
299330

300331

332+
async def test_cat_log_timeout(workflow_run_dir, app, fast_sleep):
333+
"""This is a functional test for cat_log subscription resolver.
334+
335+
It creates a log file and then runs the cat_log service. Checking it
336+
returns all the logs. Note the log content should be over 20 lines to check
337+
the buffer logic.
338+
"""
339+
(id_, log_dir) = workflow_run_dir
340+
log_file = log_dir / '01-start-01.log'
341+
log_file.write_text('forty two')
342+
info = MagicMock()
343+
info.root_value = 2
344+
# mock the context
345+
info.context = {'sub_statuses': {2: "start"}}
346+
workflow = Tokens(id_)
347+
348+
app.log_timeout = 0
349+
350+
ret = services.cat_log(workflow, app, info)
351+
responses = []
352+
async with timeout(5):
353+
async for response in ret:
354+
responses.append(response)
355+
await asyncio.sleep(0)
356+
357+
assert len(responses) == 1
358+
assert responses[0]['connected'] is False
359+
assert 'error' not in responses[0]
360+
361+
301362
@pytest.mark.parametrize(
302363
'text, expected',
303364
[

0 commit comments

Comments
 (0)