From bad2ccddd84038bbf8f62d2298782bb08dd6b9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Wed, 17 Apr 2024 09:34:12 +0200 Subject: [PATCH] fix: cleanup temporary directories on startup (close #29) --- CHANGELOG | 2 ++ mpl_data_cast/__init__.py | 2 +- mpl_data_cast/gui/main.py | 3 +++ mpl_data_cast/recipe.py | 45 ++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + tests/test_recipe.py | 40 +++++++++++++++++++++++++++++++++- 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index ce3fb53..60c7b61 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +0.6.3 + - fix: cleanup temporary directories on startup (#29) 0.6.2 - setup: bump dclab from 0.55.7 to 0.57.0 0.6.1 diff --git a/mpl_data_cast/__init__.py b/mpl_data_cast/__init__.py index 678a10f..ff922c4 100644 --- a/mpl_data_cast/__init__.py +++ b/mpl_data_cast/__init__.py @@ -1,4 +1,4 @@ # flake8: noqa: F401 -from .recipe import Recipe +from .recipe import Recipe, cleanup_tmp_dirs from . import mod_recipes from ._version import __version__ diff --git a/mpl_data_cast/gui/main.py b/mpl_data_cast/gui/main.py index e4ab957..439b832 100644 --- a/mpl_data_cast/gui/main.py +++ b/mpl_data_cast/gui/main.py @@ -71,6 +71,9 @@ def __init__(self, *args, **kwargs): self.show() self.raise_() + + # Clean up stale temporary data + mpldc_recipe.cleanup_tmp_dirs() splash.splash_close() @property diff --git a/mpl_data_cast/recipe.py b/mpl_data_cast/recipe.py index 7ca7280..a48ca91 100644 --- a/mpl_data_cast/recipe.py +++ b/mpl_data_cast/recipe.py @@ -1,6 +1,8 @@ import hashlib from abc import ABC, abstractmethod import atexit +import logging +import os import pathlib import shutil import tempfile @@ -8,9 +10,14 @@ import uuid from typing import Type, Callable, List +import psutil + from .util import HasherThread, hashfile, copyhashfile +logger = logging.getLogger(__name__) + + #: Files that are not copied (unless specified explicitly by a recipe) IGNORED_FILE_NAMES = [ ".DS_Store", @@ -45,7 +52,14 @@ def __init__(self, if not self.path_raw.exists(): raise ValueError(f"Raw data path '{self.path_raw}' doesn't exist!") #: Temporary directory (will be deleted upon application exit) - self.tempdir = pathlib.Path(tempfile.mkdtemp(prefix="MPL-Data-Cast_")) + self.tempdir = pathlib.Path( + # Use the current PID as an identifier for the temp dir + tempfile.mkdtemp( + prefix=f"PID-{os.getpid()}-{self.format}_", + dir=pathlib.Path(tempfile.gettempdir()) / "MPL-Data-Cast" + ) + ) + # Make sure everything is removed in the end. atexit.register(shutil.rmtree, self.tempdir, ignore_errors=True) def cast(self, path_callback: Callable = None, **kwargs) -> dict: @@ -254,6 +268,35 @@ def transfer_to_target_path(temp_path: pathlib.Path, return success +def cleanup_tmp_dirs(): + """Removes stale temporary recipe directories""" + # In versions <=0.6.2 of MPL-Data-Cast, the temporary files were located + # here (If a user installed a newer version of MPL-Data-Cast, then + # we can in any case safely remove these directories): + for pp in pathlib.Path(tempfile.gettempdir()).glob("MPL-Data-Cast_*"): + if pp.is_dir(): + shutil.rmtree(pp, ignore_errors=True) + + # New versions of MPL-Data-Cast, we have this temporary directory for + # recipes: + temp_dir = pathlib.Path(tempfile.gettempdir()) / "MPL-Data-Cast" + for pp in temp_dir.glob("PID-*"): + print(pp) + try: + if pp.is_dir(): + # Every subdirectory is named according to PID of its process. + # If the process does not exist anymore, we may delete it. + pid = int(pp.name.split("-")[1]) + print(pp, pid, psutil.pid_exists(pid)) + if not psutil.pid_exists(pid): + shutil.rmtree(pp, ignore_errors=True) + except BaseException: + logger.warning( + f"Could not remove stale data {pp} from PID {pid}:\n" + f"{traceback.format_exc()}" + ) + + def get_available_recipe_names() -> list[str]: names = [] for cls in Recipe.__subclasses__(): diff --git a/pyproject.toml b/pyproject.toml index 5bea68a..86abb6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "h5py>=3.8.0", "hdf5plugin", # compression "numpy>=1.21", # CVE-2021-33430 + "psutil", "pyqt6", "tifffile==2022.3.16", ] diff --git a/tests/test_recipe.py b/tests/test_recipe.py index e2ea914..eaddd82 100644 --- a/tests/test_recipe.py +++ b/tests/test_recipe.py @@ -1,8 +1,12 @@ +import atexit import hashlib +import os import pathlib +import shutil import tempfile +import uuid -from mpl_data_cast import Recipe +from mpl_data_cast import Recipe, cleanup_tmp_dirs def make_example_data(): @@ -40,6 +44,40 @@ def get_raw_data_iterator(self): return sorted(data_list) +def test_cleanup_recipes_new(): + td = pathlib.Path(tempfile.gettempdir()) / f"MPL-Data-Cast" + mypid = os.getpid() + + dir1 = td / f"PID-{mypid}-Something_else" + dir2 = td / "PID-32769-Something_else" + + atexit.register(shutil.rmtree, dir1, ignore_errors=True) + atexit.register(shutil.rmtree, dir2, ignore_errors=True) + + dir1.mkdir(exist_ok=True, parents=True) + path1 = dir1 / "a.txt" + path1.write_text("should not be deleted") + + dir2.mkdir(exist_ok=True, parents=True) + path2 = dir2 / "a.txt" + path2.write_text("should be deleted") + + cleanup_tmp_dirs() + + assert path1.exists() + assert not path2.exists() + + +def test_cleanup_recipes_old(): + td = pathlib.Path(tempfile.gettempdir()) / f"MPL-Data-Cast_{uuid.uuid4()}" + td.mkdir(parents=True, exist_ok=True) + path = td / "data.dat" + path.write_text("hello world") + assert path.exists() + cleanup_tmp_dirs() + assert not path.exists() + + def test_pipeline_init(): path_raw = make_example_data() path_tar = pathlib.Path(tempfile.mkdtemp()) / "test"