Skip to content

Commit 844af56

Browse files
committed
Fix several bugs in handling directory move events
1) When a directory is moved from inside a directory we watch, to a different directory that we don't watch, then we get a MOVED_FROM event but no MOVED_TO event, in which case we need to clean up the watch for that directory because we are no longer supposed to be watching it. 2) When a directory is moved from inside a directory we watch to inside another directory we watch, then we need to update our internal data to reflect the new location of the directory. 3) For both of the above, we need to handle subdirectories of moved directories, not just the directories themselves. To facilitate these fixes, we: * Add a new `watches()` iterator to the `Inotify` class so the tree classes can iterate through all watches to find ones that need to be removed. * Add a new `remove_tree()` function to the tree classes which handles removing entire trees. Both of these new functions are available to end users of the library since they're useful and there's no reason for them not to be. See the big comment added to the code for additional details about how this fix is implemented.
1 parent e23da30 commit 844af56

File tree

1 file changed

+111
-26
lines changed

1 file changed

+111
-26
lines changed

inotify/adapters.py

Lines changed: 111 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,24 @@ def add_watch(self, path_unicode, mask=inotify.constants.IN_ALL_EVENTS):
9393
path_bytes = path_unicode.encode('utf8')
9494

9595
wd = inotify.calls.inotify_add_watch(self.__inotify_fd, path_bytes, mask)
96-
_LOGGER.debug("Added watch (%d): [%s]", wd, path_unicode)
96+
try:
97+
old_path = self.__watches_r.pop(wd)
98+
except KeyError:
99+
_LOGGER.debug("Added watch (%d): [%s]", wd, path_unicode)
100+
else:
101+
# Already watched under a different path
102+
self.__watches.pop(old_path)
103+
_LOGGER.debug("Watch (%d) moved from %s to %s",
104+
wd, old_path, path_unicode)
97105

98106
self.__watches[path_unicode] = wd
99107
self.__watches_r[wd] = path_unicode
100108

101109
return wd
102110

111+
def watches(self):
112+
return self.__watches.keys()
113+
103114
def remove_watch(self, path, superficial=False):
104115
"""Remove our tracking information and call inotify to stop watching
105116
the given path. When a directory is removed, we'll just have to remove
@@ -182,6 +193,8 @@ def _handle_inotify_event(self, wd):
182193
path = self.__watches_r.get(header.wd)
183194
if path is not None:
184195
filename_unicode = filename_bytes.decode('utf8')
196+
if filename_unicode:
197+
_LOGGER.debug(f"Event filename received for {path}: {filename_unicode}")
185198
yield (header, type_names, path, filename_unicode)
186199

187200
buffer_length = len(self.__buffer)
@@ -270,6 +283,17 @@ def __init__(self, mask=inotify.constants.IN_ALL_EVENTS,
270283
self._i = Inotify(block_duration_s=block_duration_s)
271284
self._follow_symlinks = follow_symlinks
272285

286+
def remove_tree(self, path):
287+
path = path.rstrip(os.path.sep)
288+
_LOGGER.debug("Removing all watches beneath %s", path)
289+
prefix = path + os.path.sep
290+
# Accumulate all paths to remove before removing any to avoid messing
291+
# with the data while we're iterating through it.
292+
to_remove = [p for p in self._i.watches()
293+
if p == path or p.startswith(prefix)]
294+
for watch_path in to_remove:
295+
self._i.remove_watch(watch_path)
296+
273297
def event_gen(self, ignore_missing_new_folders=False, **kwargs):
274298
"""This is a secondary generator that wraps the principal one, and
275299
adds/removes watches as directories are added/removed.
@@ -279,56 +303,117 @@ def event_gen(self, ignore_missing_new_folders=False, **kwargs):
279303
`ignore_missing_new_folders`.
280304
"""
281305

306+
user_yield_nones = kwargs.get('yield_nones', True)
307+
kwargs['yield_nones'] = True
308+
move_from_events = {}
309+
282310
for event in self._i.event_gen(**kwargs):
283-
if event is not None:
311+
if event is None:
312+
if move_from_events:
313+
_LOGGER.debug("Handling deferred MOVED_FROM events")
314+
for move_event in move_from_events.values():
315+
(header, type_names, path, filename) = move_event
316+
self.remove_tree(os.path.join(path, filename))
317+
move_from_events = []
318+
else:
284319
(header, type_names, path, filename) = event
285320

286321
if header.mask & inotify.constants.IN_ISDIR:
287322
full_path = os.path.join(path, filename)
288323

289-
if (
290-
(header.mask & inotify.constants.IN_MOVED_TO) or
291-
(header.mask & inotify.constants.IN_CREATE)
292-
) and \
324+
if header.mask & inotify.constants.IN_CREATE and \
293325
(
294326
os.path.exists(full_path) is True or
295327
ignore_missing_new_folders is False
296328
):
297329
_LOGGER.debug("A directory has been created. We're "
298330
"adding a watch on it (because we're "
299331
"being recursive): [%s]", full_path)
300-
301-
302332
self._load_tree(full_path)
303333

304-
if header.mask & inotify.constants.IN_DELETE:
334+
elif header.mask & inotify.constants.IN_DELETE:
305335
_LOGGER.debug("A directory has been removed. We're "
306336
"being recursive, but it would have "
307337
"automatically been deregistered: [%s]",
308338
full_path)
309339

310340
# The watch would've already been cleaned-up internally.
311341
self._i.remove_watch(full_path, superficial=True)
312-
elif header.mask & inotify.constants.IN_MOVED_FROM:
313-
_LOGGER.debug("A directory has been renamed. We're "
314-
"being recursive, but it would have "
315-
"automatically been deregistered: [%s]",
316-
full_path)
317342

318-
self._i.remove_watch(full_path, superficial=True)
343+
# If a subdirectory of a directory we're watching is moved,
344+
# then there are two scenarios we need to handle:
345+
#
346+
# 1) If it has been moved out of the directory tree we're
347+
# watching, then we will get only the IN_MOVED_FROM
348+
# event for it. In this case we need to remove our watch
349+
# on the moved directory and on all of the directories
350+
# underneath it. We won't get separate events for those!
351+
# 2) If it has been moved somewhere else within the
352+
# tree we're watching, then we'll get both IN_MOVED_FROM
353+
# and IN_MOVED_TO events for it. In this case our
354+
# existing watches on the directory and its
355+
# subdirectories will remain open, but they have new
356+
# paths now so we need to update our internal data to
357+
# match the new paths. This is handled in _load_tree.
358+
#
359+
# Challenge: when we get the IN_MOVED_FROM event, how we
360+
# handle it depends on whether there is a subsequent
361+
# IN_MOVED_TO event! We don't want to remove all the
362+
# watches if this is an in-tree move, both because it's
363+
# inefficient to delete and then soon after recreate those
364+
# watches, and because it creates a race condition: if
365+
# something happens in one of the directories between when
366+
# we remove the watches and when we recreate them, we won't
367+
# get notified about it.
368+
#
369+
# We solve this by waiting to handle the IN_MOVED_FROM
370+
# event until we get a None from the primary event_gen
371+
# generator. It is reasonable to assume that linked
372+
# IN_MOVED_FROM and IN_MOVED_TO events will arrive in a
373+
# single batch of events. If there are pending
374+
# IN_MOVED_FROM events at the end of the batch, then we
375+
# assume they were moved out of tree and remove all the
376+
# corresponding watches.
377+
#
378+
# There's also a third scenario we need to handle below. If
379+
# we get an IN_MOVED_TO without a corresponding
380+
# IN_MOVED_FROM, then the directory was moved into our tree
381+
# from outside our tree, so we need to add watches for that
382+
# whole subtree.
383+
elif header.mask & inotify.constants.IN_MOVED_FROM:
384+
_LOGGER.debug(
385+
"A directory has been renamed. Deferring "
386+
"handling until we find out whether the target is "
387+
"in our tree: [%s]", full_path)
388+
move_from_events[header.cookie] = event
319389
elif header.mask & inotify.constants.IN_MOVED_TO:
320-
_LOGGER.debug("A directory has been renamed. We're "
321-
"adding a watch on it (because we're "
322-
"being recursive): [%s]", full_path)
323390
try:
324-
self._i.add_watch(full_path, self._mask)
325-
except inotify.calls.InotifyError as e:
326-
if e.errno == ENOENT:
327-
_LOGGER.warning("Path %s disappeared before we could watch it", full_path)
328-
else:
329-
raise
330-
331-
yield event
391+
from_event = move_from_events.pop(header.cookie)
392+
except KeyError:
393+
_LOGGER.debug(
394+
"A directory has been moved into our watch "
395+
"area. Adding watches for it and its "
396+
"subdirectories: [%s]", full_path)
397+
self._load_tree(full_path)
398+
else:
399+
(_, _, from_path, from_filename) = from_event
400+
full_from_path = os.path.join(
401+
from_path, from_filename)
402+
_LOGGER.debug(
403+
"A directory has been moved from %s to %s "
404+
"within our watch area. Updating internal "
405+
"data to reflect move.", full_from_path,
406+
full_path)
407+
self._load_tree(full_path)
408+
# If part of the _load_tree above fails in part or
409+
# in full because the top-level directory or some
410+
# of its subdirectories have been removed, then
411+
# they won't get cleaned up by _load_tree, so let's
412+
# clean them up just in case.
413+
self.remove_tree(full_from_path)
414+
415+
if user_yield_nones or event is not None:
416+
yield event
332417

333418
@property
334419
def inotify(self):

0 commit comments

Comments
 (0)