Skip to content

Commit 0940101

Browse files
authored
Try to load config_name_mapping from disk just once (#1013)
There's enough files (and enough YAML in these files) that the IO/YAML parsing takes a significant amount of time. While it's nice to always load from the source-of-truth (i.e., the files on-disk) - it's not worth paying the performance penalty (especially at the scale we're seeing internally) for a negligible benefit. Locally (with a large test config), this results in a ~5x improvement in timings. More concretely, my test configs were taking ~30ish seconds *without* this diff and ~6ish seconds *with* this diff.
1 parent 6d9f369 commit 0940101

File tree

2 files changed

+51
-9
lines changed

2 files changed

+51
-9
lines changed

tron/config/config_parse.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import itertools
1212
import logging
1313
import os
14+
from copy import deepcopy
1415
from typing import Dict
1516
from typing import List
1617
from typing import Optional
@@ -1068,11 +1069,14 @@ def validate_config_mapping(config_mapping):
10681069
msg = "A config mapping requires a %s namespace"
10691070
raise ConfigError(msg % MASTER_NAMESPACE)
10701071

1071-
master = valid_config(config_mapping.pop(MASTER_NAMESPACE))
1072+
# we mutate this mapping - so let's make sure that we're making a copy
1073+
# in case the passed-in mapping is used elsewhere
1074+
config_mapping_to_validate = deepcopy(config_mapping)
1075+
master = valid_config(config_mapping_to_validate.pop(MASTER_NAMESPACE))
10721076
nodes = get_nodes_from_master_namespace(master)
10731077
yield MASTER_NAMESPACE, master
10741078

1075-
for name, content in config_mapping.items():
1079+
for name, content in config_mapping_to_validate.items():
10761080
context = ConfigContext(
10771081
name,
10781082
nodes,

tron/config/manager.py

+45-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import hashlib
22
import logging
33
import os
4+
from copy import deepcopy
5+
from typing import Union
46

57
from tron import yaml
68
from tron.config import config_parse
@@ -42,7 +44,7 @@ def read_raw(path) -> str:
4244
return fh.read()
4345

4446

45-
def hash_digest(content):
47+
def hash_digest(content: Union[str, bytes]) -> str:
4648
return hashlib.sha1(
4749
maybe_encode(content)
4850
).hexdigest() # TODO: TRON-2293 maybe_encode is a relic of Python2->Python3 migration. Remove it.
@@ -97,6 +99,7 @@ class ConfigManager:
9799
def __init__(self, config_path, manifest=None):
98100
self.config_path = config_path
99101
self.manifest = manifest or ManifestFile(config_path)
102+
self.name_mapping = None
100103

101104
def build_file_path(self, name):
102105
name = name.replace(".", "_").replace(os.path.sep, "_")
@@ -107,23 +110,32 @@ def read_raw_config(self, name=schema.MASTER_NAMESPACE) -> str:
107110
filename = self.manifest.get_file_name(name)
108111
return read_raw(filename)
109112

110-
def write_config(self, name, content):
113+
def write_config(self, name: str, content: str) -> None:
114+
loaded_content = from_string(content)
111115
self.validate_with_fragment(
112116
name,
113-
from_string(content),
117+
content=loaded_content,
114118
# TODO: remove this constraint after tron triggers across clusters are supported.
115119
should_validate_missing_dependency=False,
116120
)
121+
# validate_with_fragment throws if the updated content is invalid - so if we get here
122+
# we know it's safe to reflect the update in our config store
123+
self.get_config_name_mapping()[name] = loaded_content
124+
125+
# ...and then let's also persist the update to disk since memory is temporary, but disk is forever™
117126
filename = self.get_filename_from_manifest(name)
118127
write_raw(filename, content)
119128

120-
def delete_config(self, name):
129+
def delete_config(self, name: str) -> None:
121130
filename = self.manifest.get_file_name(name)
122131
if not filename:
123132
msg = "Namespace %s does not exist in manifest, cannot delete."
124133
log.info(msg % name)
125134
return
126135

136+
# to avoid needing to reload from disk on every config load - we need to ensure that
137+
# we also persist config deletions into our cache
138+
self.get_config_name_mapping().pop(name, None)
127139
self.manifest.delete(name)
128140
os.remove(filename)
129141

@@ -141,7 +153,11 @@ def validate_with_fragment(
141153
content,
142154
should_validate_missing_dependency=True,
143155
):
144-
name_mapping = self.get_config_name_mapping()
156+
# NOTE: we deepcopy rather than swap values to keep this a pure function
157+
# get_config_name_mapping() returns a shared dict, so this would otherwise
158+
# actually update the mapping - which would be unwanted/need to be rolled-back
159+
# should validation fail.
160+
name_mapping = deepcopy(self.get_config_name_mapping())
145161
name_mapping[name] = content
146162
try:
147163
JobGraph(
@@ -152,8 +168,11 @@ def validate_with_fragment(
152168
raise ConfigError(str(e))
153169

154170
def get_config_name_mapping(self):
155-
seq = self.manifest.get_file_mapping().items()
156-
return {name: read(filename) for name, filename in seq}
171+
if self.name_mapping is None:
172+
log.info("Creating config mapping cache...")
173+
seq = self.manifest.get_file_mapping().items()
174+
self.name_mapping = {name: read(filename) for name, filename in seq}
175+
return self.name_mapping
157176

158177
def load(self):
159178
"""Return the fully constructed configuration."""
@@ -165,6 +184,25 @@ def get_hash(self, name) -> str:
165184
"""Return a hash of the configuration contents for name."""
166185
if name not in self:
167186
return self.DEFAULT_HASH
187+
188+
if name in self.get_config_name_mapping():
189+
# unfortunately, we have the parsed dict in memory.
190+
# rather than hit the disk to get the raw string - let's convert
191+
# the in-memory dict to a yaml string and hash that to save a couple
192+
# ms (in testing, ~3ms over loading from disk and ~1ms over dumping to json :p)
193+
# TODO: consider storing the hash alongside the config so that we only calculate
194+
# hashes once?
195+
return hash_digest(
196+
yaml.dump(
197+
self.get_config_name_mapping()[name],
198+
# ensure that the keys are always in a stable order
199+
sort_keys=True,
200+
),
201+
)
202+
203+
# the config for any name should always be in our name mapping
204+
# ...but just in case, let's fallback to reading from disk.
205+
log.warning("%s not found in name mapping - falling back to hashing contents on disk!")
168206
return hash_digest(self.read_raw_config(name))
169207

170208
def __contains__(self, name):

0 commit comments

Comments
 (0)