Skip to content

Commit 1b5a362

Browse files
committed
Merge branch 'profile-mem'
# Conflicts: # README.md
2 parents 0edf232 + 97a14b0 commit 1b5a362

File tree

5 files changed

+38
-3
lines changed

5 files changed

+38
-3
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ Almost all behaviour of the OCR-D/core software is configured via CLI options an
9090
Some parts of the software are configured via environement variables:
9191

9292
* `OCRD_METS_CACHING`: If set to `true`, access to the METS file is cached, speeding in-memory search and modification.
93+
* `OCRD_PROFILE`: This variable configures the built-in CPU and memory profiling. If empty, no profiling is done. Otherwise expected to contain any of the following tokens:
94+
* `CPU`: Enable CPU profiling of processor runs
95+
* `RSS`: Enable RSS memory profiling
96+
* `PSS`: Enable proportionate memory profiling
97+
* `OCRD_PROFILE_FILE`: If set, then the CPU profile is written to this file for later peruse with a analysis tools like [snakeviz](https://jiffyclub.github.io/snakeviz/)
9398

9499
## Packages
95100

ocrd/ocrd/decorators/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from os.path import isfile
2+
from os import environ
23
import sys
34

45
import click
@@ -89,6 +90,12 @@ def ocrd_cli_wrap_processor(
8990
report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
9091
if not report.is_valid:
9192
raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
93+
# Set up profiling behavior from environment variables/flags
94+
if not profile and 'OCRD_PROFILE' in environ:
95+
if 'CPU' in environ['OCRD_PROFILE']:
96+
profile = True
97+
if not profile_file and 'OCRD_PROFILE_FILE' in environ:
98+
profile_file = environ['OCRD_PROFILE_FILE']
9299
if profile or profile_file:
93100
import cProfile
94101
import pstats

ocrd/ocrd/processor/helpers.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""
22
Helper methods for running and documenting processors
33
"""
4+
from os import environ
45
from time import perf_counter, process_time
56
import json
67
import inspect
78
from subprocess import run, PIPE
9+
from memory_profiler import memory_usage
10+
from sparklines import sparklines
811

912
from click import wrap_text
1013
from ocrd_utils import getLogger
@@ -85,7 +88,23 @@ def run_processor(
8588
log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
8689
t0_wall = perf_counter()
8790
t0_cpu = process_time()
88-
processor.process()
91+
if any(x in environ.get('OCRD_PROFILE', '') for x in ['RSS', 'PSS']):
92+
backend = 'psutil_pss' if 'PSS' in environ['OCRD_PROFILE'] else 'psutil'
93+
mem_usage = memory_usage(proc=processor.process,
94+
# only run process once
95+
max_iterations=1,
96+
interval=.1, timeout=None, timestamps=True,
97+
# include sub-processes
98+
multiprocess=True, include_children=True,
99+
# get proportional set size instead of RSS
100+
backend=backend)
101+
mem_usage_values = [mem for mem, _ in mem_usage]
102+
mem_output = 'memory consumption: '
103+
mem_output += ''.join(sparklines(mem_usage_values))
104+
mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values))
105+
logProfile.info(mem_output)
106+
else:
107+
processor.process()
89108
t1_wall = perf_counter() - t0_wall
90109
t1_cpu = process_time() - t0_cpu
91110
logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % (

ocrd/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,6 @@ opencv-python-headless
77
Flask
88
jsonschema
99
pyyaml
10-
Deprecated == 1.2.0
10+
Deprecated == 1.2.0
11+
memory-profiler >= 0.58.0
12+
sparklines >= 0.4.2

tests/test_logging.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,9 @@ def testProcessorProfiling(self):
168168
# with open('/tmp/debug.log', 'w') as f:
169169
# f.write(log_contents)
170170
# Check whether profile information has been logged. Dummy should finish in under 0.1s
171-
self.assertTrue(match(r'.*Executing processor \'ocrd-test\' took 0.\d+s.*', log_contents))
171+
# print(log_contents)
172+
assert "Executing processor 'ocrd-test' took 0" in log_contents
173+
# self.assertTrue(match(r'.*Executing processor \'ocrd-test\' took 0.\d+s.*', log_contents))
172174

173175
def test_tmpConfigfile(self):
174176
self.assertNotEqual(logging.getLogger('').getEffectiveLevel(), logging.NOTSET)

0 commit comments

Comments
 (0)