Skip to content

Commit 4c807a6

Browse files
Add source code used to evaluate Mercury ATC'17.
1 parent 65e5f1c commit 4c807a6

27 files changed

+4775
-0
lines changed

Diff for: changelog.py

+528
Large diffs are not rendered by default.

Diff for: compute-average-project-metadata-file-sizes.py

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#!/usr/bin/env python3
2+
3+
4+
# 1st-party
5+
import bz2
6+
import json
7+
import os
8+
import sys
9+
10+
11+
# 2nd-party
12+
from nouns import METADATA_DIRECTORY, TUF_DIRECTORY
13+
14+
15+
# 3rd-party
16+
import jsonpatch
17+
18+
19+
# Find the project with a recurring cost closest to the given average.
20+
def find_project_with_avg_recurring_cost(FIRST_SNAPSHOT_FILEPATH,
21+
LAST_SNAPSHOT_FILEPATH,
22+
AVG_RECURRING_COST):
23+
with open(FIRST_SNAPSHOT_FILEPATH) as first_snapshot_file, \
24+
open(LAST_SNAPSHOT_FILEPATH) as last_snapshot_file:
25+
first_snapshot = json.load(first_snapshot_file)
26+
last_snapshot = json.load(last_snapshot_file)
27+
28+
first_projects = first_snapshot['signed']['meta']
29+
last_projects = last_snapshot['signed']['meta']
30+
min_abs_diff = sys.maxsize
31+
project_with_min_abs_diff = None
32+
total_number_of_projects = 0
33+
34+
for project_metadata_filepath, \
35+
first_project_metadata_identifier in first_projects.items():
36+
# Count this project only if it exists in both the first and last snapshots.
37+
last_project_metadata_identifier = \
38+
last_projects.get(project_metadata_filepath)
39+
if last_project_metadata_identifier:
40+
first_project_metadata = \
41+
get_project_metadata_json(project_metadata_filepath,
42+
first_project_metadata_identifier)
43+
last_project_metadata = \
44+
get_project_metadata_json(project_metadata_filepath,
45+
last_project_metadata_identifier)
46+
project_metadata_file_size = get_delta_size(first_project_metadata,
47+
last_project_metadata)
48+
abs_diff = abs(project_metadata_file_size - AVG_RECURRING_COST)
49+
if abs_diff < min_abs_diff:
50+
min_abs_diff = abs_diff
51+
project_with_min_abs_diff = project_metadata_filepath
52+
total_number_of_projects += 1
53+
54+
assert project_with_min_abs_diff
55+
print('{} has min abs diff to avg recurring cost: {:,} bytes'.\
56+
format(project_with_min_abs_diff, min_abs_diff))
57+
58+
# Why the last snapshot? Because we want to get the best estimate we can of
59+
# the average project metadata file size, which should get better with a larger
60+
# number of projects.
61+
def get_avg_initial_cost(LAST_SNAPSHOT_FILEPATH,
62+
output_filename='avg_initial_cost.txt'):
63+
with open(LAST_SNAPSHOT_FILEPATH) as last_snapshot_file:
64+
last_snapshot = json.load(last_snapshot_file)
65+
66+
projects = last_snapshot['signed']['meta']
67+
total_project_metadata_file_size = 0
68+
max_project_metadata_file_size = -1
69+
70+
with open(os.path.join(METADATA_DIRECTORY,
71+
output_filename), 'wt') as output_file:
72+
for project_metadata_filepath, \
73+
project_metadata_identifier in projects.items():
74+
project_metadata = get_project_metadata_bytes(project_metadata_filepath,
75+
project_metadata_identifier)
76+
project_metadata_file_size = len(bz2.compress(project_metadata))
77+
if project_metadata_file_size > max_project_metadata_file_size:
78+
max_project_metadata_file_size = project_metadata_file_size
79+
output_file.write('{}\n'.format(project_metadata_file_size))
80+
total_project_metadata_file_size += project_metadata_file_size
81+
82+
avg_initial_cost = round(total_project_metadata_file_size / len(projects))
83+
print('Avg GPG/RSA initial cost = {:,} bytes'.\
84+
format(avg_initial_cost))
85+
print('Max GPG/RSA initial cost = {:,} bytes'\
86+
.format(max_project_metadata_file_size))
87+
return avg_initial_cost
88+
89+
90+
# Compute the average recurring cost for a project metadata file that existed
91+
# between the first and last snapshots.
92+
def get_avg_recurring_cost(FIRST_SNAPSHOT_FILEPATH, LAST_SNAPSHOT_FILEPATH,
93+
output_filename='avg_recurring_cost.txt'):
94+
with open(FIRST_SNAPSHOT_FILEPATH) as first_snapshot_file, \
95+
open(LAST_SNAPSHOT_FILEPATH) as last_snapshot_file:
96+
first_snapshot = json.load(first_snapshot_file)
97+
last_snapshot = json.load(last_snapshot_file)
98+
99+
first_projects = first_snapshot['signed']['meta']
100+
last_projects = last_snapshot['signed']['meta']
101+
total_project_metadata_file_size = 0
102+
max_project_metadata_file_size = -1
103+
total_number_of_projects = 0
104+
105+
with open(os.path.join(METADATA_DIRECTORY,
106+
output_filename), 'wt') as output_file:
107+
for project_metadata_filepath, \
108+
first_project_metadata_identifier in first_projects.items():
109+
# Count this project only if it exists in both the first and last
110+
# snapshots.
111+
last_project_metadata_identifier = \
112+
last_projects.get(project_metadata_filepath)
113+
if last_project_metadata_identifier:
114+
first_project_metadata = \
115+
get_project_metadata_json(project_metadata_filepath,
116+
first_project_metadata_identifier)
117+
last_project_metadata = \
118+
get_project_metadata_json(project_metadata_filepath,
119+
last_project_metadata_identifier)
120+
project_metadata_file_size = get_delta_size(first_project_metadata,
121+
last_project_metadata)
122+
if project_metadata_file_size > max_project_metadata_file_size:
123+
max_project_metadata_file_size = project_metadata_file_size
124+
output_file.write('{}\n'.format(project_metadata_file_size))
125+
total_project_metadata_file_size += project_metadata_file_size
126+
total_number_of_projects += 1
127+
128+
avg_recurring_cost = round(total_project_metadata_file_size / \
129+
total_number_of_projects)
130+
print('# of recurring projects: {:,}'.format(total_number_of_projects))
131+
print('Avg GPG/RSA recurring cost = {:,} bytes'.\
132+
format(avg_recurring_cost))
133+
print('Max GPG/RSA recurring cost = {:,} bytes'\
134+
.format(max_project_metadata_file_size))
135+
return avg_recurring_cost
136+
137+
138+
def get_delta_size(prev, curr):
139+
patch = jsonpatch.make_patch(prev, curr)
140+
141+
patch_str = str(patch)
142+
patch_str_length = len(patch_str)
143+
compressed_patch_str = bz2.compress(patch_str.encode('utf-8'))
144+
compressed_patch_str_length = len(compressed_patch_str)
145+
146+
# If the patch is small enough, compression may increase bandwidth cost.
147+
return min(patch_str_length, compressed_patch_str_length)
148+
149+
150+
def get_project_metadata_bytes(project_metadata_filepath,
151+
project_metadata_identifier):
152+
return get_project_metadata_str(project_metadata_filepath,
153+
project_metadata_identifier).encode('utf-8')
154+
155+
156+
def get_project_metadata_json(project_metadata_filepath,
157+
project_metadata_identifier):
158+
return json.loads(get_project_metadata_str(project_metadata_filepath,
159+
project_metadata_identifier))
160+
161+
162+
def get_project_metadata_str(project_metadata_filepath,
163+
project_metadata_identifier):
164+
assert project_metadata_filepath.endswith('.json')
165+
project_metadata_filepath = '{}.{}{}'.format(project_metadata_filepath[:-5],
166+
project_metadata_identifier,
167+
'.json')
168+
project_metadata_filepath = os.path.join(TUF_DIRECTORY,
169+
project_metadata_filepath)
170+
with open(project_metadata_filepath) as project_metadata_file:
171+
return project_metadata_file.read()
172+
173+
174+
if __name__ == '__main__':
175+
# It shouldn't matter whether we're looking at the project metadata for TUF or
176+
# any other security system, because they should all have the same project
177+
# metadata.
178+
FIRST_SNAPSHOT_FILEPATH = os.path.join(TUF_DIRECTORY,
179+
'snapshot.1395359999.json')
180+
LAST_SNAPSHOT_FILEPATH = os.path.join(TUF_DIRECTORY,
181+
'snapshot.1397951828.json')
182+
183+
get_avg_initial_cost(LAST_SNAPSHOT_FILEPATH)
184+
print('')
185+
avg_recurring_cost = get_avg_recurring_cost(FIRST_SNAPSHOT_FILEPATH,
186+
LAST_SNAPSHOT_FILEPATH)
187+
print('')
188+
# There are minor differences due to nondeterminism in bz2 output, and I think
189+
# they are non-consequential.
190+
find_project_with_avg_recurring_cost(FIRST_SNAPSHOT_FILEPATH,
191+
LAST_SNAPSHOT_FILEPATH,
192+
avg_recurring_cost)

Diff for: mercury-log-filter.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/sh
2+
3+
# This is a script to automate the filtration of logs so that we do is
4+
# reproducible and can be checked for errors.
5+
6+
EXPERIMENTS_DIR=/var/experiments
7+
8+
# Keep only successful HTTP requests for /packages/.* from ALL UAs.
9+
time ./mercury-log-stripper.py
10+
11+
# Sort these requests by time and merge them all.
12+
time ./mercury-log-sorter.sh
13+
14+
# Make a copy of these requests, but limited to the first day.
15+
time ./trim-sorted-mercury-log.py
16+
17+
# Go to where the log was merged.
18+
cd /var/experiments-output/simple/
19+
20+
# List number of lines.
21+
time wc -l sorted.mercury.log*

Diff for: mercury-log-sorter.sh

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
umask 007
4+
5+
cd /var/experiments-output/anonymized/
6+
7+
for log in mercury.*.log
8+
do
9+
# Sort by timestamp (k1), IP (k2), user-agent (k4), URL (k3).
10+
time sort --field-separator=, --unique -k1 -k2 -k4 -k3 $log > sorted.$log
11+
echo sorted.$log
12+
done
13+
14+
rm mercury.*.log
15+
echo 'rm mercury.*.log'
16+
17+
time sort --field-separator=, --unique -k1 -k2 -k4 -k3 -ms -o sorted.mercury.log sorted.mercury.*.log
18+
echo sorted.mercury.log
19+
20+
rm sorted.mercury.*.log
21+
echo 'rm sorted.mercury.*.log'
22+
23+
mkdir /var/experiments-output/simple/
24+
mv sorted.mercury.log /var/experiments-output/simple/

0 commit comments

Comments
 (0)