1
1
import hashlib
2
2
import logging
3
3
import os
4
+ from copy import deepcopy
5
+ from typing import Union
4
6
5
7
from tron import yaml
6
8
from tron .config import config_parse
@@ -42,7 +44,7 @@ def read_raw(path) -> str:
42
44
return fh .read ()
43
45
44
46
45
- def hash_digest (content ) :
47
+ def hash_digest (content : Union [ str , bytes ]) -> str :
46
48
return hashlib .sha1 (
47
49
maybe_encode (content )
48
50
).hexdigest () # TODO: TRON-2293 maybe_encode is a relic of Python2->Python3 migration. Remove it.
@@ -97,6 +99,7 @@ class ConfigManager:
97
99
def __init__ (self , config_path , manifest = None ):
98
100
self .config_path = config_path
99
101
self .manifest = manifest or ManifestFile (config_path )
102
+ self .name_mapping = None
100
103
101
104
def build_file_path (self , name ):
102
105
name = name .replace ("." , "_" ).replace (os .path .sep , "_" )
@@ -107,23 +110,32 @@ def read_raw_config(self, name=schema.MASTER_NAMESPACE) -> str:
107
110
filename = self .manifest .get_file_name (name )
108
111
return read_raw (filename )
109
112
110
- def write_config (self , name , content ):
113
+ def write_config (self , name : str , content : str ) -> None :
114
+ loaded_content = from_string (content )
111
115
self .validate_with_fragment (
112
116
name ,
113
- from_string ( content ) ,
117
+ content = loaded_content ,
114
118
# TODO: remove this constraint after tron triggers across clusters are supported.
115
119
should_validate_missing_dependency = False ,
116
120
)
121
+ # validate_with_fragment throws if the updated content is invalid - so if we get here
122
+ # we know it's safe to reflect the update in our config store
123
+ self .get_config_name_mapping ()[name ] = loaded_content
124
+
125
+ # ...and then let's also persist the update to disk since memory is temporary, but disk is forever™
117
126
filename = self .get_filename_from_manifest (name )
118
127
write_raw (filename , content )
119
128
120
- def delete_config (self , name ) :
129
+ def delete_config (self , name : str ) -> None :
121
130
filename = self .manifest .get_file_name (name )
122
131
if not filename :
123
132
msg = "Namespace %s does not exist in manifest, cannot delete."
124
133
log .info (msg % name )
125
134
return
126
135
136
+ # to avoid needing to reload from disk on every config load - we need to ensure that
137
+ # we also persist config deletions into our cache
138
+ self .get_config_name_mapping ().pop (name , None )
127
139
self .manifest .delete (name )
128
140
os .remove (filename )
129
141
@@ -141,7 +153,11 @@ def validate_with_fragment(
141
153
content ,
142
154
should_validate_missing_dependency = True ,
143
155
):
144
- name_mapping = self .get_config_name_mapping ()
156
+ # NOTE: we deepcopy rather than swap values to keep this a pure function
157
+ # get_config_name_mapping() returns a shared dict, so this would otherwise
158
+ # actually update the mapping - which would be unwanted/need to be rolled-back
159
+ # should validation fail.
160
+ name_mapping = deepcopy (self .get_config_name_mapping ())
145
161
name_mapping [name ] = content
146
162
try :
147
163
JobGraph (
@@ -152,8 +168,11 @@ def validate_with_fragment(
152
168
raise ConfigError (str (e ))
153
169
154
170
def get_config_name_mapping (self ):
155
- seq = self .manifest .get_file_mapping ().items ()
156
- return {name : read (filename ) for name , filename in seq }
171
+ if self .name_mapping is None :
172
+ log .info ("Creating config mapping cache..." )
173
+ seq = self .manifest .get_file_mapping ().items ()
174
+ self .name_mapping = {name : read (filename ) for name , filename in seq }
175
+ return self .name_mapping
157
176
158
177
def load (self ):
159
178
"""Return the fully constructed configuration."""
@@ -165,6 +184,25 @@ def get_hash(self, name) -> str:
165
184
"""Return a hash of the configuration contents for name."""
166
185
if name not in self :
167
186
return self .DEFAULT_HASH
187
+
188
+ if name in self .get_config_name_mapping ():
189
+ # unfortunately, we have the parsed dict in memory.
190
+ # rather than hit the disk to get the raw string - let's convert
191
+ # the in-memory dict to a yaml string and hash that to save a couple
192
+ # ms (in testing, ~3ms over loading from disk and ~1ms over dumping to json :p)
193
+ # TODO: consider storing the hash alongside the config so that we only calculate
194
+ # hashes once?
195
+ return hash_digest (
196
+ yaml .dump (
197
+ self .get_config_name_mapping ()[name ],
198
+ # ensure that the keys are always in a stable order
199
+ sort_keys = True ,
200
+ ),
201
+ )
202
+
203
+ # the config for any name should always be in our name mapping
204
+ # ...but just in case, let's fallback to reading from disk.
205
+ log .warning ("%s not found in name mapping - falling back to hashing contents on disk!" )
168
206
return hash_digest (self .read_raw_config (name ))
169
207
170
208
def __contains__ (self , name ):
0 commit comments