Skip to content

Commit 0939cff

Browse files
committed
benchmark: more features & fixes
- add support for preexisting file server instance (--fileserver) - add HTML endpoint benchmarks (--render-type html) - make --sites-dir required - dump output in proper JSON
1 parent c0dae05 commit 0939cff

File tree

3 files changed

+135
-51
lines changed

3 files changed

+135
-51
lines changed

splash/benchmark/benchmark.py

+82-24
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
99
"""
1010

11+
import json
1112
import logging
1213
import os
1314
import random
@@ -20,26 +21,24 @@
2021
import sys
2122

2223
import requests
23-
from splash.benchmark.file_server import serve_files
24-
from splash.tests.utils import SplashServer
2524

2625

2726
def make_render_png_req(splash, params):
28-
"""Prepare request for render.png endpoint."""
27+
"""Make PNG render request via render.png endpoint."""
2928
return {'url': splash.url('render.png'),
3029
'params': params}
3130

3231

3332
def make_render_json_req(splash, params):
34-
"""Prepare request for render.json endpoint."""
33+
"""Make PNG render request via JSON endpoint."""
3534
json_params = params.copy()
3635
json_params['png'] = 1
3736
return {'url': splash.url('render.json'),
3837
'params': json_params}
3938

4039

4140
def make_render_png_lua_req(splash, params):
42-
"""Prepare request for execute endpoint."""
41+
"""Make PNG render request via Lua execute endpoint."""
4342
lua_params = params.copy()
4443
lua_params['lua_source'] = """
4544
function main(splash)
@@ -57,11 +56,51 @@ def make_render_png_lua_req(splash, params):
5756
'params': lua_params}
5857

5958

60-
REQ_FACTORIES = [
61-
make_render_png_req,
62-
make_render_json_req,
63-
make_render_png_lua_req,
64-
]
59+
def make_render_html_req(splash, params):
60+
"""Make HTML render request via render.html endpoint."""
61+
return {'url': splash.url('render.html'),
62+
'params': params}
63+
64+
65+
def make_render_html_json_req(splash, params):
66+
"""Make HTML render request via JSON endpoint."""
67+
json_params = params.copy()
68+
json_params['html'] = 1
69+
return {'url': splash.url('render.json'),
70+
'params': json_params}
71+
72+
73+
def make_render_html_lua_req(splash, params):
74+
"""Make HTML render request via Lua execute endpoint."""
75+
lua_params = params.copy()
76+
lua_params['lua_source'] = """
77+
function main(splash)
78+
assert(splash:go(splash.args.url))
79+
if splash.args.wait then
80+
assert(splash:wait(splash.args.wait))
81+
end
82+
splash:set_result_content_type("text/html; charset=UTF-8")
83+
return splash:html{}
84+
end
85+
"""
86+
return {'url': splash.url('execute'),
87+
'params': lua_params}
88+
89+
90+
#: Same resource may be rendered by various endpoints with slightly varying
91+
#: parameter combinations. Request factories set those combinations up.
92+
REQ_FACTORIES = {
93+
'png': [
94+
make_render_png_req,
95+
make_render_json_req,
96+
make_render_png_lua_req,
97+
],
98+
'html': [
99+
make_render_html_req,
100+
make_render_html_json_req,
101+
make_render_html_lua_req,
102+
],
103+
}
65104

66105

67106
#: Port at which static pages will be served.
@@ -86,15 +125,20 @@ def make_render_png_lua_req(splash, params):
86125
help='Request thread count')
87126
parser.add_argument('--request-count', type=int, default=10,
88127
help='Benchmark request count')
89-
parser.add_argument('--sites-dir', type=str, default='sites',
128+
parser.add_argument('--sites-dir', type=str, default='sites', required=True,
90129
help='Directory with downloaded sites')
130+
parser.add_argument('--file-server', metavar='HOST:PORT',
131+
help='Use existing file server instance available at HOST:PORT')
91132
parser.add_argument('--splash-server', metavar='HOST:PORT',
92133
help='Use existing Splash instance available at HOST:PORT')
93134
parser.add_argument('--out-file', type=FileType(mode='w'), default=sys.stdout,
94135
help='Write detailed request information in this file')
136+
parser.add_argument('--render-type', choices=('html', 'png'), default='png',
137+
help=('Type of rendering to benchmark'
138+
' (either "html" or "png")'))
95139

96140

97-
def generate_requests(splash, args):
141+
def generate_requests(splash, file_server, args):
98142
log = logging.getLogger('generate_requests')
99143
log.info("Using pRNG seed: %s", args.seed)
100144

@@ -106,12 +150,14 @@ def generate_requests(splash, args):
106150
for p in pages:
107151
log.info("Using page for benchmark: %s", p)
108152

153+
request_factories = REQ_FACTORIES[args.render_type]
154+
109155
rng = random.Random(args.seed)
110156
for i in xrange(args.request_count):
111157
page = rng.choice(pages)
112158
width, height = rng.choice(WIDTH_HEIGHT)
113-
req_factory = rng.choice(REQ_FACTORIES)
114-
url = 'http://localhost:%d/%s' % (PORT, page)
159+
req_factory = rng.choice(request_factories)
160+
url = file_server.url(page)
115161
params = {'url': url, 'render_all': 1, 'wait': 0.1,
116162
'width': width, 'height': height}
117163
log.debug("Req factory: %s, params: %s", req_factory, params)
@@ -145,7 +191,7 @@ def invoke_request(invoke_args):
145191
'height': kwargs['params']['height']}
146192

147193

148-
class ExistingSplashWrapper(object):
194+
class ExistingServerWrapper(object):
149195
"""Wrapper for pre-existing Splash instance."""
150196
def __init__(self, server):
151197
self.server = server
@@ -165,37 +211,49 @@ def __exit__(self, *args):
165211
def main():
166212
log = logging.getLogger("benchmark")
167213
args = parser.parse_args()
168-
logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(logging.WARNING)
214+
(logging.getLogger('requests.packages.urllib3.connectionpool')
215+
.setLevel(logging.WARNING))
169216
logging.basicConfig(level=logging.DEBUG)
170217

171218
if args.splash_server:
172-
splash = ExistingSplashWrapper(args.splash_server)
219+
splash = ExistingServerWrapper(args.splash_server)
173220
else:
221+
from splash.tests.utils import SplashServer
174222
splash = SplashServer(
175223
logfile=SPLASH_LOG,
176224
extra_args=['--disable-lua-sandbox',
177225
'--disable-xvfb',
178226
'--max-timeout=600'])
179227

180-
with splash, serve_files(port=PORT, directory=args.sites_dir, logfile=FILESERVER_LOG):
228+
if args.file_server:
229+
file_server = ExistingServerWrapper(args.file_server)
230+
else:
231+
from splash.benchmark.file_server import FileServerSubprocess
232+
file_server = FileServerSubprocess(port=PORT,
233+
path=args.sites_dir,
234+
logfile=FILESERVER_LOG)
235+
236+
with splash, file_server:
181237
log.info("Servers are up, starting benchmark...")
182238
start_res = requests.get(
183239
splash.url('execute'),
184240
params={'lua_source': GET_PERF_STATS_SCRIPT}).json()
185241
start_time = time()
186-
results = parallel_map(invoke_request, generate_requests(splash, args),
242+
results = parallel_map(invoke_request,
243+
generate_requests(splash, file_server, args),
187244
args.thread_count)
188245
end_time = time()
189246
end_res = requests.get(
190247
splash.url('execute'),
191248
params={'lua_source': GET_PERF_STATS_SCRIPT}).json()
192249

193250
log.info("Writing stats to %s", args.out_file.name)
194-
args.out_file.write(pformat({
195-
'maxrss': end_res['maxrss'],
196-
'cputime': end_res['cputime'] - start_res['cputime'],
197-
'walltime': end_time - start_time,
198-
'requests': results}))
251+
args.out_file.write(json.dumps(
252+
{'maxrss': end_res['maxrss'],
253+
'cputime': end_res['cputime'] - start_res['cputime'],
254+
'walltime': end_time - start_time,
255+
'requests': results},
256+
indent=2))
199257
log.info("Splash max RSS: %s B", end_res['maxrss'])
200258
log.info("Splash CPU time elapsed: %.2f sec",
201259
end_res['cputime'] - start_res['cputime'])

splash/benchmark/download_sites.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
import os
1111
import re
1212
import subprocess
13+
import logging
1314
from urlparse import urlsplit
1415

1516
from lxml import html
1617

1718
import w3lib.html
18-
from splash.benchmark.file_server import serve_files
19+
from splash.benchmark.file_server import FileServerSubprocess
1920
from splash.tests.stress import lua_runonce
2021

2122
SCRIPT_HTML = """
@@ -91,14 +92,18 @@ def download_sites(sites_dir, sites):
9192

9293
def main():
9394
args = parser.parse_args()
95+
(logging.getLogger('requests.packages.urllib3.connectionpool')
96+
.setLevel(logging.WARNING))
97+
logging.basicConfig(level=logging.DEBUG)
98+
logging.info("Starting site download suite")
9499
try:
95100
os.makedirs(args.sites_dir)
96101
except OSError as e:
97102
if e.errno != errno.EEXIST:
98103
raise
99104
elif not os.path.isdir(args.sites_dir):
100105
raise RuntimeError("Not a directory: %s" % args.sites_dir)
101-
with serve_files(PORT, args.sites_dir):
106+
with FileServerSubprocess(port=PORT, path=args.sites_dir):
102107
download_sites(args.sites_dir, [
103108
'http://www.wikipedia.org',
104109
'http://www.google.com',

splash/benchmark/file_server.py

+46-25
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import subprocess
88
import time
99
import sys
10+
import logging
1011
from contextlib import contextmanager
1112

1213
from twisted.internet import reactor
@@ -18,44 +19,64 @@
1819

1920
parser = argparse.ArgumentParser("")
2021
parser.add_argument('--port', type=int, default=8806)
21-
parser.add_argument('--directory', help='Directory to be served', default='.')
22-
parser.add_argument('--logfile', default=sys.stderr, type=argparse.FileType(mode='w'),
22+
parser.add_argument('--path', help='Path to be served', default='.')
23+
parser.add_argument('--logfile', default=sys.stderr,
24+
type=argparse.FileType(mode='w'),
2325
help='File to write logs to')
2426

25-
@contextmanager
26-
def serve_files(port, directory, logfile=None):
27+
28+
class FileServerSubprocess(object):
29+
logger = logging.getLogger('file_server')
30+
2731
"""Serve files from specified directory statically in a subprocess."""
28-
# command = ['twistd',
29-
# '-n', # don't daemonize
30-
# 'web', # start web component
31-
# '--port', str(int(port)),
32-
# '--path', os.path.abspath(directory), ]
33-
# if logfile is not None:
34-
# command += ['--logfile', logfile]
35-
command = ['python', __file__,
36-
'--port', str(int(port)),
37-
'--directory', os.path.abspath(directory)]
38-
if logfile is not None:
39-
command += ['--logfile', logfile]
40-
site_server = subprocess.Popen(command)
41-
try:
32+
def __init__(self, port, path, logfile=None):
33+
self.port = port
34+
self.path = path
35+
self.logfile = logfile
36+
self.server = 'http://localhost:%d' % port
37+
38+
def url(self, endpoint):
39+
return self.server + '/' + endpoint
40+
41+
def __enter__(self):
42+
# command = ['twistd',
43+
# '-n', # don't daemonize
44+
# 'web', # start web component
45+
# '--port', str(int(port)),
46+
# '--path', os.path.abspath(directory), ]
47+
# if logfile is not None:
48+
# command += ['--logfile', logfile]
49+
command = ['python', __file__,
50+
'--port', str(int(self.port)),
51+
'--path', os.path.abspath(self.path)]
52+
if self.logfile is not None:
53+
command += ['--logfile', self.logfile]
54+
self.logger.info("Starting file server subprocess: %s", command)
55+
self._site_server = subprocess.Popen(command)
4256
# It might take some time to bring up the server, wait for up to 10s.
4357
for i in xrange(100):
4458
try:
45-
requests.get('http://localhost:%d' % port)
59+
self.logger.info("Checking if file server is active")
60+
requests.get(self.url(''))
61+
break
4662
except requests.ConnectionError:
4763
time.sleep(0.1)
48-
else:
49-
break
50-
yield
51-
finally:
52-
site_server.terminate()
64+
else:
65+
msg = "File server subprocess startup timed out"
66+
if self.logfile:
67+
with open(self.logfile, 'r') as log_f:
68+
msg += ", logs:\n" + log_f.read()
69+
raise RuntimeError(msg)
70+
71+
def __exit__(self, *args):
72+
self._site_server.kill()
73+
self._site_server.wait()
5374

5475

5576
def main():
5677
args = parser.parse_args()
5778
startLogging(args.logfile)
58-
resource = File(os.path.abspath(args.directory))
79+
resource = File(os.path.abspath(args.path))
5980
site = Site(resource)
6081
reactor.listenTCP(args.port, site)
6182
reactor.run()

0 commit comments

Comments
 (0)