Skip to content

Commit 1eb6af1

Browse files
authored
Merge pull request #172 from legend-exp/codex/ensure-hdf5-file-is-closed-before-exception
Fix open file handles on serialization errors
2 parents bb6e291 + e512531 commit 1eb6af1

5 files changed

Lines changed: 463 additions & 304 deletions

File tree

src/lgdo/lh5/_serializers/write/composite.py

Lines changed: 162 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -52,140 +52,166 @@ def _h5_write_lgdo(
5252
# In hdf5, 'a' is really "modify" -- in addition to appending, you can
5353
# change any object in the file. So we use file:append for
5454
# write_object:overwrite.
55+
opened_here = False
5556
if not isinstance(lh5_file, h5py.File):
5657
mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57-
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
5858

59-
log.debug(
60-
f"writing {obj!r}[{start_row}:{n_rows}] as "
61-
f"{lh5_file.filename}:{group}/{name}[{write_start}:], "
62-
f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
63-
)
64-
65-
group = utils.get_h5_group(group, lh5_file)
59+
try:
60+
fh = h5py.File(lh5_file, mode=mode, **file_kwargs)
61+
except OSError as oe:
62+
raise LH5EncodeError(str(oe), lh5_file, None) from oe
6663

67-
# name already in file
68-
if name in group or (
69-
("datatype" in group.attrs or group == "/")
70-
and (len(name) <= 2 or "/" not in name[1:-1])
71-
):
72-
pass
73-
# group is in file but not struct or need to create nesting
64+
opened_here = True
7465
else:
75-
# check if name is nested
76-
# if name is nested, iterate up from parent
77-
# otherwise we just need to iterate the group
78-
if len(name) > 2 and "/" in name[1:-1]:
79-
group = utils.get_h5_group(
80-
name[:-1].rsplit("/", 1)[0],
81-
group,
82-
)
83-
curr_name = (
84-
name.rsplit("/", 1)[1]
85-
if name[-1] != "/"
86-
else name[:-1].rsplit("/", 1)[1]
87-
)
88-
else:
89-
curr_name = name
90-
# initialize the object to be written
91-
obj = types.Struct({curr_name.replace("/", ""): obj})
66+
fh = lh5_file
9267

93-
# if base group already has a child we just append
94-
if len(group) >= 1:
95-
wo_mode = "ac"
96-
else:
97-
# iterate up the group hierarchy until we reach the root or a group with more than one child
98-
while group.name != "/":
99-
if len(group) > 1:
100-
break
101-
curr_name = group.name
102-
group = group.parent
103-
if group.name != "/":
104-
obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
105-
else:
106-
obj = types.Struct({curr_name[1:]: obj})
107-
# if the group has more than one child, we need to append else we can overwrite
108-
wo_mode = "ac" if len(group) > 1 else "o"
109-
110-
# set the new name
111-
if group.name == "/":
112-
name = "/"
113-
elif group.parent.name == "/":
114-
name = group.name[1:]
115-
else:
116-
name = group.name[len(group.parent.name) + 1 :]
117-
# get the new group
118-
group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
68+
try:
69+
log.debug(
70+
f"writing {obj!r}[{start_row}:{n_rows}] as "
71+
f"{fh.filename}:{group}/{name}[{write_start}:], "
72+
f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
73+
)
11974

120-
if wo_mode == "w" and name in group:
121-
msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
122-
raise LH5EncodeError(msg, lh5_file, group, name)
75+
group = utils.get_h5_group(group, fh)
12376

124-
# struct, table, waveform table or histogram.
125-
if isinstance(obj, types.Struct):
126-
if (
127-
isinstance(obj, types.Histogram)
128-
and wo_mode not in ["w", "o", "of"]
129-
and name in group
77+
# name already in file
78+
if name in group or (
79+
("datatype" in group.attrs or group == "/")
80+
and (len(name) <= 2 or "/" not in name[1:-1])
13081
):
131-
msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
132-
raise LH5EncodeError(msg, lh5_file, group, name)
133-
if isinstance(obj, types.Histogram) and write_start != 0:
134-
msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
135-
raise LH5EncodeError(msg, lh5_file, group, name)
136-
137-
return _h5_write_struct(
138-
obj,
139-
name,
140-
lh5_file,
141-
group=group,
142-
start_row=start_row,
143-
n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
144-
wo_mode=wo_mode,
145-
write_start=write_start,
146-
**h5py_kwargs,
147-
)
148-
149-
# scalars
150-
if isinstance(obj, types.Scalar):
151-
return _h5_write_scalar(obj, name, lh5_file, group, wo_mode)
82+
pass
83+
# group is in file but not struct or need to create nesting
84+
else:
85+
# check if name is nested
86+
# if name is nested, iterate up from parent
87+
# otherwise we just need to iterate the group
88+
if len(name) > 2 and "/" in name[1:-1]:
89+
group = utils.get_h5_group(
90+
name[:-1].rsplit("/", 1)[0],
91+
group,
92+
)
93+
curr_name = (
94+
name.rsplit("/", 1)[1]
95+
if name[-1] != "/"
96+
else name[:-1].rsplit("/", 1)[1]
97+
)
98+
else:
99+
curr_name = name
100+
# initialize the object to be written
101+
obj = types.Struct({curr_name.replace("/", ""): obj})
102+
103+
# if base group already has a child we just append
104+
if len(group) >= 1:
105+
wo_mode = "ac"
106+
else:
107+
# iterate up the group hierarchy until we reach the root or a group with more than one child
108+
while group.name != "/":
109+
if len(group) > 1:
110+
break
111+
curr_name = group.name
112+
group = group.parent
113+
if group.name != "/":
114+
obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
115+
else:
116+
obj = types.Struct({curr_name[1:]: obj})
117+
# if the group has more than one child, we need to append else we can overwrite
118+
wo_mode = "ac" if len(group) > 1 else "o"
119+
120+
# set the new name
121+
if group.name == "/":
122+
name = "/"
123+
elif group.parent.name == "/":
124+
name = group.name[1:]
125+
else:
126+
name = group.name[len(group.parent.name) + 1 :]
127+
# get the new group
128+
group = utils.get_h5_group(group.parent if group.name != "/" else "/", fh)
129+
130+
if wo_mode == "w" and name in group:
131+
msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
132+
raise LH5EncodeError(msg, fh, group, name)
133+
134+
# struct, table, waveform table or histogram.
135+
if isinstance(obj, types.Struct):
136+
if (
137+
isinstance(obj, types.Histogram)
138+
and wo_mode not in ["w", "o", "of"]
139+
and name in group
140+
):
141+
msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
142+
raise LH5EncodeError(msg, fh, group, name)
143+
if isinstance(obj, types.Histogram) and write_start != 0:
144+
msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
145+
raise LH5EncodeError(msg, fh, group, name)
146+
147+
return _h5_write_struct(
148+
obj,
149+
name,
150+
fh,
151+
group=group,
152+
start_row=start_row,
153+
n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
154+
wo_mode=wo_mode,
155+
write_start=write_start,
156+
**h5py_kwargs,
157+
)
152158

153-
# vector of encoded vectors
154-
if isinstance(
155-
obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
156-
):
157-
group = utils.get_h5_group(
158-
name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
159-
)
159+
# scalars
160+
if isinstance(obj, types.Scalar):
161+
return _h5_write_scalar(obj, name, fh, group, wo_mode)
160162

161-
# ask not to further compress flattened_data, it is already compressed!
162-
obj.encoded_data.flattened_data.attrs["compression"] = None
163+
# vector of encoded vectors
164+
if isinstance(
165+
obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
166+
):
167+
group = utils.get_h5_group(
168+
name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
169+
)
163170

164-
_h5_write_vector_of_vectors(
165-
obj.encoded_data,
166-
"encoded_data",
167-
lh5_file,
168-
group=group,
169-
start_row=start_row,
170-
n_rows=n_rows,
171-
wo_mode=wo_mode,
172-
write_start=write_start,
173-
**h5py_kwargs,
174-
)
171+
# ask not to further compress flattened_data, it is already compressed!
172+
obj.encoded_data.flattened_data.attrs["compression"] = None
175173

176-
if isinstance(obj.decoded_size, types.Scalar):
177-
_h5_write_scalar(
178-
obj.decoded_size,
179-
"decoded_size",
180-
lh5_file,
174+
_h5_write_vector_of_vectors(
175+
obj.encoded_data,
176+
"encoded_data",
177+
fh,
181178
group=group,
179+
start_row=start_row,
180+
n_rows=n_rows,
182181
wo_mode=wo_mode,
182+
write_start=write_start,
183+
**h5py_kwargs,
183184
)
184-
else:
185-
_h5_write_array(
186-
obj.decoded_size,
187-
"decoded_size",
188-
lh5_file,
185+
186+
if isinstance(obj.decoded_size, types.Scalar):
187+
_h5_write_scalar(
188+
obj.decoded_size,
189+
"decoded_size",
190+
fh,
191+
group=group,
192+
wo_mode=wo_mode,
193+
)
194+
else:
195+
_h5_write_array(
196+
obj.decoded_size,
197+
"decoded_size",
198+
fh,
199+
group=group,
200+
start_row=start_row,
201+
n_rows=n_rows,
202+
wo_mode=wo_mode,
203+
write_start=write_start,
204+
**h5py_kwargs,
205+
)
206+
207+
return None
208+
209+
# vector of vectors
210+
if isinstance(obj, types.VectorOfVectors):
211+
return _h5_write_vector_of_vectors(
212+
obj,
213+
name,
214+
fh,
189215
group=group,
190216
start_row=start_row,
191217
n_rows=n_rows,
@@ -194,38 +220,25 @@ def _h5_write_lgdo(
194220
**h5py_kwargs,
195221
)
196222

197-
return None
198-
199-
# vector of vectors
200-
if isinstance(obj, types.VectorOfVectors):
201-
return _h5_write_vector_of_vectors(
202-
obj,
203-
name,
204-
lh5_file,
205-
group=group,
206-
start_row=start_row,
207-
n_rows=n_rows,
208-
wo_mode=wo_mode,
209-
write_start=write_start,
210-
**h5py_kwargs,
211-
)
212-
213-
# if we get this far, must be one of the Array types
214-
if isinstance(obj, types.Array):
215-
return _h5_write_array(
216-
obj,
217-
name,
218-
lh5_file,
219-
group=group,
220-
start_row=start_row,
221-
n_rows=n_rows,
222-
wo_mode=wo_mode,
223-
write_start=write_start,
224-
**h5py_kwargs,
225-
)
223+
# if we get this far, must be one of the Array types
224+
if isinstance(obj, types.Array):
225+
return _h5_write_array(
226+
obj,
227+
name,
228+
fh,
229+
group=group,
230+
start_row=start_row,
231+
n_rows=n_rows,
232+
wo_mode=wo_mode,
233+
write_start=write_start,
234+
**h5py_kwargs,
235+
)
226236

227-
msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
228-
raise LH5EncodeError(msg, lh5_file, group, name)
237+
msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
238+
raise LH5EncodeError(msg, fh, group, name)
239+
finally:
240+
if opened_here:
241+
fh.close()
229242

230243

231244
def _h5_write_struct(

0 commit comments

Comments
 (0)