diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f025252749..0f489aaef5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,19 +136,19 @@ jobs: "include": [ {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "mypy"}, {"os": "ubuntu-22.04", "python-version": "3.11", "toxenv": "docs"}, - {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "py310-fuse2"}, - {"os": "ubuntu-24.04", "python-version": "3.14", "toxenv": "py314-fuse3"} + {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "py310-llfuse"}, + {"os": "ubuntu-24.04", "python-version": "3.14", "toxenv": "py314-mfusepy"} ] }' || '{ "include": [ {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "mypy"}, {"os": "ubuntu-22.04", "python-version": "3.11", "toxenv": "docs"}, - {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "py310-fuse2"}, - {"os": "ubuntu-22.04", "python-version": "3.11", "toxenv": "py311-fuse2", "binary": "borg-linux-glibc235-x86_64-gh"}, - {"os": "ubuntu-22.04-arm", "python-version": "3.11", "toxenv": "py311-fuse2", "binary": "borg-linux-glibc235-arm64-gh"}, - {"os": "ubuntu-24.04", "python-version": "3.12", "toxenv": "py312-fuse3"}, - {"os": "ubuntu-24.04", "python-version": "3.13", "toxenv": "py313-fuse3"}, - {"os": "ubuntu-24.04", "python-version": "3.14", "toxenv": "py314-fuse3"}, + {"os": "ubuntu-22.04", "python-version": "3.10", "toxenv": "py310-llfuse"}, + {"os": "ubuntu-22.04", "python-version": "3.11", "toxenv": "py311-llfuse", "binary": "borg-linux-glibc235-x86_64-gh"}, + {"os": "ubuntu-22.04-arm", "python-version": "3.11", "toxenv": "py311-llfuse", "binary": "borg-linux-glibc235-arm64-gh"}, + {"os": "ubuntu-24.04", "python-version": "3.12", "toxenv": "py312-pyfuse3"}, + {"os": "ubuntu-24.04", "python-version": "3.13", "toxenv": "py313-pyfuse3"}, + {"os": "ubuntu-24.04", "python-version": "3.14", "toxenv": "py314-mfusepy"}, {"os": "macos-15-intel", "python-version": "3.11", "toxenv": "py311-none", "binary": "borg-macos-15-x86_64-gh"}, {"os": "macos-15", "python-version": "3.11", "toxenv": "py311-none", "binary": "borg-macos-15-arm64-gh"} ] @@ -190,9 +190,9 @@ jobs: sudo apt-get install -y libssl-dev libacl1-dev libxxhash-dev liblz4-dev libzstd-dev sudo apt-get install -y bash zsh fish # for shell completion tests sudo apt-get install -y rclone openssh-server curl - if [[ "$TOXENV" == *"fuse2"* ]]; then + if [[ "$TOXENV" == *"llfuse"* ]]; then sudo apt-get install -y libfuse-dev fuse # Required for Python llfuse module - elif [[ "$TOXENV" == *"fuse3"* ]]; then + elif [[ "$TOXENV" == *"pyfuse3"* || "$TOXENV" == *"mfusepy"* ]]; then sudo apt-get install -y libfuse3-dev fuse3 # Required for Python pyfuse3 module fi @@ -266,10 +266,12 @@ jobs: - name: Install borgbackup run: | - if [[ "$TOXENV" == *"fuse2"* ]]; then + if [[ "$TOXENV" == *"llfuse"* ]]; then pip install -ve ".[llfuse]" - elif [[ "$TOXENV" == *"fuse3"* ]]; then + elif [[ "$TOXENV" == *"pyfuse3"* ]]; then pip install -ve ".[pyfuse3]" + elif [[ "$TOXENV" == *"mfusepy"* ]]; then + pip install -ve ".[mfusepy]" else pip install -ve . fi @@ -423,8 +425,8 @@ jobs: pip -V python -m pip install --upgrade pip wheel pip install -r requirements.d/development.txt - pip install -e ".[llfuse]" - tox -e py311-fuse2 + pip install -e ".[mfusepy]" + tox -e py311-mfusepy if [[ "${{ matrix.do_binaries }}" == "true" && "${{ startsWith(github.ref, 'refs/tags/') }}" == "true" ]]; then python -m pip install 'pyinstaller==6.14.2' diff --git a/Vagrantfile b/Vagrantfile index ae85649369..2bec9f5190 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -373,7 +373,7 @@ Vagrant.configure(2) do |config| b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("llfuse") b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller() b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd13") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd13", ".*(fuse3|none).*") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd13", ".*(pyfuse3|none).*") end config.vm.define "freebsd14" do |b| @@ -390,7 +390,7 @@ Vagrant.configure(2) do |config| b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("llfuse") b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller() b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd14") - b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd14", ".*(fuse3|none).*") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd14", ".*(pyfuse3|none).*") end config.vm.define "openbsd7" do |b| @@ -413,7 +413,7 @@ Vagrant.configure(2) do |config| b.vm.provision "fs init", :type => :shell, :inline => fs_init("vagrant") b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd9") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false) + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("nofuse") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd9", ".*fuse.*") end diff --git a/docs/global.rst.inc b/docs/global.rst.inc index d8016d3b81..c961eb5bbe 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -23,6 +23,7 @@ .. _msgpack: https://msgpack.org/ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ +.. _mfusepy: https://pypi.python.org/pypi/mfusepy/ .. _pyfuse3: https://pypi.python.org/pypi/pyfuse3/ .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace .. _Cython: http://cython.org/ diff --git a/docs/installation.rst b/docs/installation.rst index 674a0f9708..0e9fd92b3e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -175,6 +175,7 @@ development header files (sometimes in a separate `-dev` or `-devel` package). * Optionally, if you wish to mount an archive as a FUSE filesystem, you need a FUSE implementation for Python: + - mfusepy_ >= 3.1.0 (for fuse 2 and fuse 3, use `pip install borgbackup[mfusepy]`), or - pyfuse3_ >= 3.1.1 (for fuse 3, use `pip install borgbackup[pyfuse3]`), or - llfuse_ >= 1.3.8 (for fuse 2, use `pip install borgbackup[llfuse]`). - Additionally, your OS will need to have FUSE support installed diff --git a/docs/usage/general/environment.rst.inc b/docs/usage/general/environment.rst.inc index 670a6403d2..ce80aabf70 100644 --- a/docs/usage/general/environment.rst.inc +++ b/docs/usage/general/environment.rst.inc @@ -88,8 +88,9 @@ General: This is a comma-separated list of implementation names, they are tried in the given order, e.g.: - - ``pyfuse3,llfuse``: default, first try to load pyfuse3, then try to load llfuse. + - ``mfusepy,pyfuse3,llfuse``: default, first try to load mfusepy, then pyfuse3, then llfuse. - ``llfuse,pyfuse3``: first try to load llfuse, then try to load pyfuse3. + - ``mfusepy``: only try to load mfusepy - ``pyfuse3``: only try to load pyfuse3 - ``llfuse``: only try to load llfuse - ``none``: do not try to load an implementation diff --git a/pyproject.toml b/pyproject.toml index c7428e2056..b12333239d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,10 @@ dependencies = [ ] [project.optional-dependencies] -llfuse = ["llfuse >= 1.3.8"] -pyfuse3 = ["pyfuse3 >= 3.1.1"] +llfuse = ["llfuse >= 1.3.8"] # fuse 2, low-level +pyfuse3 = ["pyfuse3 >= 3.1.1"] # fuse 3, low-level, async +mfusepy = ["mfusepy >= 3.1.0, <4.0.0"] # fuse 2+3, high-level +mfusepym = ["mfusepy @ git+https://github.com/mxmlnkn/mfusepy.git@master"] nofuse = [] s3 = ["borgstore[s3] ~= 0.3.0"] sftp = ["borgstore[sftp] ~= 0.3.0"] @@ -166,7 +168,7 @@ ignore_missing_imports = true requires = ["tox>=4.19", "pkgconfig", "cython", "wheel", "setuptools_scm"] # Important: when adding/removing Python versions here, # also update the section "Test environments with different FUSE implementations" accordingly. -env_list = ["py{310,311,312,313,314}-{none,fuse2,fuse3}", "docs", "ruff", "mypy", "bandit"] +env_list = ["py{310,311,312,313,314}-{none,llfuse,pyfuse3,mfusepy}", "docs", "ruff", "mypy", "bandit"] [tool.tox.env_run_base] package = "editable-legacy" # without this it does not find setup_docs when running under fakeroot @@ -180,54 +182,74 @@ pass_env = ["*"] # needed by tox4, so env vars are visible for building borg # Test environments with different FUSE implementations [tool.tox.env.py310-none] -[tool.tox.env.py310-fuse2] +[tool.tox.env.py310-llfuse] set_env = {BORG_FUSE_IMPL = "llfuse"} extras = ["llfuse", "sftp", "s3"] -[tool.tox.env.py310-fuse3] +[tool.tox.env.py310-pyfuse3] set_env = {BORG_FUSE_IMPL = "pyfuse3"} extras = ["pyfuse3", "sftp", "s3"] +[tool.tox.env.py310-mfusepy] +set_env = {BORG_FUSE_IMPL = "mfusepy"} +extras = ["mfusepy", "sftp", "s3"] + [tool.tox.env.py311-none] -[tool.tox.env.py311-fuse2] +[tool.tox.env.py311-llfuse] set_env = {BORG_FUSE_IMPL = "llfuse"} extras = ["llfuse", "sftp", "s3"] -[tool.tox.env.py311-fuse3] +[tool.tox.env.py311-pyfuse3] set_env = {BORG_FUSE_IMPL = "pyfuse3"} extras = ["pyfuse3", "sftp", "s3"] +[tool.tox.env.py311-mfusepy] +set_env = {BORG_FUSE_IMPL = "mfusepy"} +extras = ["mfusepy", "sftp", "s3"] + [tool.tox.env.py312-none] -[tool.tox.env.py312-fuse2] +[tool.tox.env.py312-llfuse] set_env = {BORG_FUSE_IMPL = "llfuse"} extras = ["llfuse", "sftp", "s3"] -[tool.tox.env.py312-fuse3] +[tool.tox.env.py312-pyfuse3] set_env = {BORG_FUSE_IMPL = "pyfuse3"} extras = ["pyfuse3", "sftp", "s3"] +[tool.tox.env.py312-mfusepy] +set_env = {BORG_FUSE_IMPL = "mfusepy"} +extras = ["mfusepy", "sftp", "s3"] + [tool.tox.env.py313-none] -[tool.tox.env.py313-fuse2] +[tool.tox.env.py313-llfuse] set_env = {BORG_FUSE_IMPL = "llfuse"} extras = ["llfuse", "sftp", "s3"] -[tool.tox.env.py313-fuse3] +[tool.tox.env.py313-pyfuse3] set_env = {BORG_FUSE_IMPL = "pyfuse3"} extras = ["pyfuse3", "sftp", "s3"] +[tool.tox.env.py313-mfusepy] +set_env = {BORG_FUSE_IMPL = "mfusepy"} +extras = ["mfusepy", "sftp", "s3"] + [tool.tox.env.py314-none] -[tool.tox.env.py314-fuse2] +[tool.tox.env.py314-llfuse] set_env = {BORG_FUSE_IMPL = "llfuse"} extras = ["llfuse", "sftp", "s3"] -[tool.tox.env.py314-fuse3] +[tool.tox.env.py314-pyfuse3] set_env = {BORG_FUSE_IMPL = "pyfuse3"} extras = ["pyfuse3", "sftp", "s3"] +[tool.tox.env.py314-mfusepy] +set_env = {BORG_FUSE_IMPL = "mfusepy"} +extras = ["mfusepy", "sftp", "s3"] + [tool.tox.env.ruff] skip_install = true deps = ["ruff"] diff --git a/src/borg/archiver/mount_cmds.py b/src/borg/archiver/mount_cmds.py index 13180c7cd8..2acae843e8 100644 --- a/src/borg/archiver/mount_cmds.py +++ b/src/borg/archiver/mount_cmds.py @@ -19,9 +19,9 @@ def do_mount(self, args): """Mounts an archive or an entire repository as a FUSE filesystem.""" # Perform these checks before opening the repository and asking for a passphrase. - from ..fuse_impl import llfuse, BORG_FUSE_IMPL + from ..fuse_impl import llfuse, has_mfusepy, BORG_FUSE_IMPL - if llfuse is None: + if llfuse is None and not has_mfusepy: raise RTError("borg mount not available: no FUSE support, BORG_FUSE_IMPL=%s." % BORG_FUSE_IMPL) if not os.path.isdir(args.mountpoint): @@ -34,16 +34,31 @@ def do_mount(self, args): @with_repository(compatibility=(Manifest.Operation.READ,)) def _do_mount(self, args, repository, manifest): - from ..fuse import FuseOperations + from ..fuse_impl import has_mfusepy - with cache_if_remote(repository, decrypted_cache=manifest.repo_objs) as cached_repo: - operations = FuseOperations(manifest, args, cached_repo) + if has_mfusepy: + # Use mfusepy implementation + from ..hlfuse import borgfs + + operations = borgfs(manifest, args, repository) logger.info("Mounting filesystem") try: operations.mount(args.mountpoint, args.options, args.foreground, args.show_rc) except RuntimeError: # Relevant error message already printed to stderr by FUSE raise RTError("FUSE mount failed") + else: + # Use llfuse/pyfuse3 implementation + from ..fuse import FuseOperations + + with cache_if_remote(repository, decrypted_cache=manifest.repo_objs) as cached_repo: + operations = FuseOperations(manifest, args, cached_repo) + logger.info("Mounting filesystem") + try: + operations.mount(args.mountpoint, args.options, args.foreground, args.show_rc) + except RuntimeError: + # Relevant error message already printed to stderr by FUSE + raise RTError("FUSE mount failed") def do_umount(self, args): """Unmounts the FUSE filesystem.""" diff --git a/src/borg/conftest.py b/src/borg/conftest.py index 540a6b3f9e..ebe1be1807 100644 --- a/src/borg/conftest.py +++ b/src/borg/conftest.py @@ -12,7 +12,7 @@ setup_logging() from borg.archiver import Archiver # noqa: E402 -from borg.testsuite import has_lchflags, has_llfuse, has_pyfuse3 # noqa: E402 +from borg.testsuite import has_lchflags, has_llfuse, has_pyfuse3, has_mfusepy # noqa: E402 from borg.testsuite import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported # noqa: E402 from borg.testsuite.archiver import BORG_EXES from borg.testsuite.platform.platform_test import fakeroot_detected # noqa: E402 @@ -37,8 +37,9 @@ def clean_env(tmpdir_factory, monkeypatch): def pytest_report_header(config, start_path): tests = { "BSD flags": has_lchflags, - "fuse2": has_llfuse, - "fuse3": has_pyfuse3, + "llfuse": has_llfuse, + "pyfuse3": has_pyfuse3, + "mfusepy": has_mfusepy, "root": not fakeroot_detected(), "symlinks": are_symlinks_supported(), "hardlinks": are_hardlinks_supported(), diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 14e7142135..4138c430b0 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -9,10 +9,17 @@ import time from collections import defaultdict, Counter from signal import SIGINT +from typing import TYPE_CHECKING from .constants import ROBJ_FILE_STREAM, zeros -from .fuse_impl import llfuse, has_pyfuse3 -from .platform import ENOATTR + +if TYPE_CHECKING: + # For type checking, assume llfuse is available + # This allows mypy to understand llfuse.Operations + import llfuse + from .fuse_impl import has_pyfuse3, ENOATTR +else: + from .fuse_impl import llfuse, has_pyfuse3, ENOATTR if has_pyfuse3: import trio diff --git a/src/borg/fuse_impl.py b/src/borg/fuse_impl.py index 9f525f82ad..6e761d9265 100644 --- a/src/borg/fuse_impl.py +++ b/src/borg/fuse_impl.py @@ -1,36 +1,63 @@ """ -Loads the library for the low-level FUSE implementation. +Loads the library for the FUSE implementation. """ import os +import types -BORG_FUSE_IMPL = os.environ.get("BORG_FUSE_IMPL", "pyfuse3,llfuse") +from .platform import ENOATTR # noqa + +BORG_FUSE_IMPL = os.environ.get("BORG_FUSE_IMPL", "mfusepy,pyfuse3,llfuse") + +hlfuse: types.ModuleType | None = None +llfuse: types.ModuleType | None = None for FUSE_IMPL in BORG_FUSE_IMPL.split(","): FUSE_IMPL = FUSE_IMPL.strip() if FUSE_IMPL == "pyfuse3": try: - import pyfuse3 as llfuse + import pyfuse3 except ImportError: pass else: + llfuse = pyfuse3 has_llfuse = False has_pyfuse3 = True + has_mfusepy = False + has_any_fuse = True + hlfuse = None # noqa break elif FUSE_IMPL == "llfuse": try: - import llfuse + import llfuse as llfuse_module except ImportError: pass else: + llfuse = llfuse_module has_llfuse = True has_pyfuse3 = False + has_mfusepy = False + has_any_fuse = True + hlfuse = None # noqa + break + elif FUSE_IMPL == "mfusepy": + try: + import mfusepy + except ImportError: + pass + else: + hlfuse = mfusepy + has_llfuse = False + has_pyfuse3 = False + has_mfusepy = True + has_any_fuse = True break elif FUSE_IMPL == "none": pass else: raise RuntimeError("Unknown FUSE implementation in BORG_FUSE_IMPL: '%s'." % BORG_FUSE_IMPL) else: - llfuse = None # noqa has_llfuse = False has_pyfuse3 = False + has_mfusepy = False + has_any_fuse = False diff --git a/src/borg/hlfuse.py b/src/borg/hlfuse.py new file mode 100644 index 0000000000..c08475961e --- /dev/null +++ b/src/borg/hlfuse.py @@ -0,0 +1,697 @@ +import datetime +import errno +import hashlib +import os +import stat +import time +from collections import Counter +from typing import TYPE_CHECKING + +from .constants import ROBJ_FILE_STREAM, zeros, ROBJ_DONTCARE + +if TYPE_CHECKING: + # For type checking, assume mfusepy is available + # This allows mypy to understand hlfuse.Operations + import mfusepy as hlfuse + from .fuse_impl import ENOATTR +else: + from .fuse_impl import hlfuse, ENOATTR + +from .logger import create_logger + +logger = create_logger() + +from .archiver._common import build_matcher, build_filter +from .archive import Archive, get_item_uid_gid +from .hashindex import FuseVersionsIndex +from .helpers import daemonize, daemonizing, signal_handler, bin_to_hex +from .helpers import HardLinkManager +from .helpers import msgpack +from .helpers.lrucache import LRUCache +from .item import Item +from .platform import uid2user, gid2group +from .platformflags import is_darwin +from .repository import Repository +from .remote import RemoteRepository + +BLOCK_SIZE = 512 # Standard filesystem block size for st_blocks and statfs +DEBUG_LOG: str | None = None # os.path.join(os.getcwd(), "fuse_debug.log") + + +def debug_log(msg): + """Append debug message to fuse_debug.log""" + if DEBUG_LOG: + timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3] + with open(DEBUG_LOG, "a") as f: + f.write(f"{timestamp} {msg}\n") + + +class DirEntry: + __slots__ = ("ino", "parent", "children") + + def __init__(self, ino, parent=None): + self.ino = ino # inode number + self.parent = parent + self.children = None # name (bytes) -> DirEntry, lazily allocated + + def add_child(self, name, child): + """Add a child entry, lazily allocating the children dict if needed.""" + if self.children is None: + self.children = {} + self.children[name] = child + + def get_child(self, name): + """Get a child entry by name, returns None if not found.""" + if self.children is None: + return None + return self.children.get(name) + + def has_child(self, name): + """Check if a child with the given name exists.""" + if self.children is None: + return False + return name in self.children + + def iter_children(self): + """Iterate over (name, child) pairs.""" + if self.children is None: + return iter([]) + return self.children.items() + + +class FuseBackend: + """Virtual filesystem based on archive(s) to provide information to fuse""" + + def __init__(self, manifest, args, repository): + self._args = args + self.numeric_ids = args.numeric_ids + self._manifest = manifest + self.repo_objs = manifest.repo_objs + self.repository = repository + + self.default_uid = os.getuid() + self.default_gid = os.getgid() + self.default_dir = None + + self.current_ino = 0 + self.inodes = {} # node.ino -> packed item + self.root = self._create_node() + self.pending_archives = {} # DirEntry -> Archive + + self.allow_damaged_files = False + self.versions = False + self.uid_forced = None + self.gid_forced = None + self.umask = 0 + self.archive_root_dir = {} # archive ID --> directory name + + # Cache for file handles + self.handles = {} + self.handle_count = 0 + + # Cache for chunks (moved from ItemCache) + self.chunks_cache = LRUCache(capacity=10) + + def _create_node(self, item=None, parent=None): + self.current_ino += 1 + if item is not None: + self.set_inode(self.current_ino, item) + return DirEntry(self.current_ino, parent) + + def get_inode(self, ino): + packed = self.inodes.get(ino) + if packed is None: + return None + return Item(internal_dict=msgpack.unpackb(packed)) + + def set_inode(self, ino, item): + if item is None: + self.inodes.pop(ino, None) + else: + # Remove path from the item dict before packing to save memory. + # The path is already encoded in the DirEntry tree structure. + item_dict = item.as_dict() + item_dict.pop("path", None) + self.inodes[ino] = msgpack.packb(item_dict) + + def _create_filesystem(self): + self.set_inode(self.root.ino, self.default_dir) + self.versions_index = FuseVersionsIndex() + + if getattr(self._args, "name", None): + archives = [self._manifest.archives.get(self._args.name)] + else: + archives = self._manifest.archives.list_considering(self._args) + + name_counter = Counter(a.name for a in archives) + duplicate_names = {a.name for a in archives if name_counter[a.name] > 1} + + for archive in archives: + name = f"{archive.name}" + if name in duplicate_names: + name += f"-{bin_to_hex(archive.id):.8}" + self.archive_root_dir[archive.id] = name + + for archive in archives: + if self.versions: + self._process_archive(archive.id) + else: + # Create placeholder for archive + name = self.archive_root_dir[archive.id] + name_bytes = os.fsencode(name) + + archive_node = self._create_node(parent=self.root) + # Create a directory item for the archive + item = Item(internal_dict=self.default_dir.as_dict()) + item.mtime = int(archive.ts.timestamp() * 1e9) + self.set_inode(archive_node.ino, item) + + self.root.add_child(name_bytes, archive_node) + self.pending_archives[archive_node] = archive + + def check_pending_archive(self, node): + archive_info = self.pending_archives.pop(node, None) + if archive_info is not None: + self._process_archive(archive_info.id, node) + + def _iter_archive_items(self, archive_item_ids, filter=None): + unpacker = msgpack.Unpacker() + for id, cdata in zip(archive_item_ids, self.repository.get_many(archive_item_ids)): + _, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE) + unpacker.feed(data) + for item in unpacker: + item = Item(internal_dict=item) + if filter and not filter(item): + continue + yield item + + def _process_archive(self, archive_id, root_node=None): + if root_node is None: + root_node = self.root + + self.file_versions = {} # for versions mode: original path -> version + + archive = Archive(self._manifest, archive_id) + strip_components = self._args.strip_components + matcher = build_matcher(self._args.patterns, self._args.paths) + hlm = HardLinkManager(id_type=bytes, info_type=str) + + filter = build_filter(matcher, strip_components) + + for item in self._iter_archive_items(archive.metadata.items, filter=filter): + if strip_components: + item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) + + path = os.fsencode(item.path) + segments = path.split(b"/") + is_dir = stat.S_ISDIR(item.mode) + + # For versions mode, handle files differently + if self.versions and not is_dir: + self._process_leaf_versioned(segments, item, root_node, hlm) + else: + # Original non-versions logic + node = root_node + # Traverse/Create directories + for segment in segments[:-1]: + if not node.has_child(segment): + new_node = self._create_node(parent=node) + # We might need a default directory item if it's an implicit directory + self.set_inode(new_node.ino, Item(internal_dict=self.default_dir.as_dict())) + node.add_child(segment, new_node) + node = node.get_child(segment) + + # Leaf (file or explicit directory) + leaf_name = segments[-1] + if node.has_child(leaf_name): + # Already exists (e.g. implicit dir became explicit) + child = node.get_child(leaf_name) + self.set_inode(child.ino, item) # Update item + node = child + else: + new_node = self._create_node(item, parent=node) + node.add_child(leaf_name, new_node) + node = new_node + + # Handle hardlinks (non-versions mode) + if "hlid" in item: + link_target = hlm.retrieve(id=item.hlid, default=None) + if link_target is not None: + target_path = os.fsencode(link_target) + target_node = self._find_node_from_root(root_node, target_path) + if target_node: + # Reuse ino and item from target + node.ino = target_node.ino + # node.item = target_node.item # implicitly shared via ID + item = self.get_inode(node.ino) + if "nlink" not in item: + item.nlink = 1 + item.nlink += 1 + self.set_inode(node.ino, item) + else: + logger.warning("Hardlink target not found: %s", link_target) + else: + hlm.remember(id=item.hlid, info=item.path) + + def _process_leaf_versioned(self, segments, item, root_node, hlm): + """Process a file leaf node in versions mode""" + path = b"/".join(segments) + original_path = item.path + + # Handle hardlinks in versions mode - check if we've seen this hardlink before + is_hardlink = "hlid" in item + link_target = None + if is_hardlink: + link_target = hlm.retrieve(id=item.hlid, default=None) + if link_target is None: + # First occurrence of this hardlink + hlm.remember(id=item.hlid, info=original_path) + + # Calculate version for this file + # If it's a hardlink to a previous file, use that version + if is_hardlink and link_target is not None: + link_target_enc = os.fsencode(link_target) + version = self.file_versions.get(link_target_enc) + else: + version = self._file_version(item, path) + + # Store version for this path + if version is not None: + self.file_versions[path] = version + + # Navigate to parent directory + node = root_node + for segment in segments[:-1]: + if not node.has_child(segment): + new_node = self._create_node(parent=node) + self.set_inode(new_node.ino, Item(internal_dict=self.default_dir.as_dict())) + node.add_child(segment, new_node) + node = node.get_child(segment) + + # Create intermediate directory with the filename + leaf_name = segments[-1] + if not node.has_child(leaf_name): + intermediate_node = self._create_node(parent=node) + self.set_inode(intermediate_node.ino, Item(internal_dict=self.default_dir.as_dict())) + node.add_child(leaf_name, intermediate_node) + else: + intermediate_node = node.get_child(leaf_name) + + # Create versioned filename + if version is not None: + versioned_name = self._make_versioned_name(leaf_name, version) + + # If this is a hardlink to a previous file, reuse that node + if is_hardlink and link_target is not None: + link_target_enc = os.fsencode(link_target) + link_segments = link_target_enc.split(b"/") + link_version = self.file_versions.get(link_target_enc) + if link_version is not None: + # Navigate to the link target + target_node = root_node + for seg in link_segments[:-1]: + if target_node.has_child(seg): + target_node = target_node.get_child(seg) + else: + break + else: + # Get intermediate dir + link_leaf = link_segments[-1] + if target_node.has_child(link_leaf): + target_intermediate = target_node.get_child(link_leaf) + target_versioned = self._make_versioned_name(link_leaf, link_version) + if target_intermediate.has_child(target_versioned): + original_node = target_intermediate.get_child(target_versioned) + # Create new node but reuse the ino and item from original + item = self.get_inode(original_node.ino) + file_node = self._create_node(item, parent=intermediate_node) + file_node.ino = original_node.ino + # Update nlink count + item = self.get_inode(file_node.ino) + if "nlink" not in item: + item.nlink = 1 + item.nlink += 1 + self.set_inode(file_node.ino, item) + intermediate_node.add_child(versioned_name, file_node) + return + + # Not a hardlink or first occurrence - create new node + file_node = self._create_node(item, parent=intermediate_node) + intermediate_node.add_child(versioned_name, file_node) + + def _file_version(self, item, path): + """Calculate version number for a file based on its contents""" + if "chunks" not in item: + return None + + # note: using sha256 here because nowadays it is often hw accelerated. + # shortening the hashes to 16 bytes to save some memory. + file_id = hashlib.sha256(path).digest()[:16] + current_version, previous_id = self.versions_index.get(file_id, (0, None)) + + contents_id = hashlib.sha256(b"".join(chunk_id for chunk_id, _ in item.chunks)).digest()[:16] + + if contents_id != previous_id: + current_version += 1 + self.versions_index[file_id] = current_version, contents_id + + return current_version + + def _make_versioned_name(self, name, version): + """Generate versioned filename like 'file.00001.txt'""" + # keep original extension at end to avoid confusing tools + name_str = name.decode("utf-8", "surrogateescape") if isinstance(name, bytes) else name + name_part, ext = os.path.splitext(name_str) + version_str = ".%05d" % version + versioned = name_part + version_str + ext + return versioned.encode("utf-8", "surrogateescape") if isinstance(name, bytes) else versioned + + def _find_node_from_root(self, root, path): + if path == b"" or path == b".": + return root + segments = path.split(b"/") + node = root + for segment in segments: + child = node.get_child(segment) + if child is not None: + node = child + else: + return None + return node + + def _find_node(self, path): + if isinstance(path, str): + path = os.fsencode(path) + if path == b"/" or path == b"": + return self.root + if path.startswith(b"/"): + path = path[1:] + + segments = path.split(b"/") + node = self.root + for segment in segments: + if node in self.pending_archives: + self.check_pending_archive(node) + child = node.get_child(segment) + if child is not None: + node = child + else: + return None + + if node in self.pending_archives: + self.check_pending_archive(node) + + return node + + def _get_handle(self, node): + self.handle_count += 1 + self.handles[self.handle_count] = node + return self.handle_count + + def _get_node_from_handle(self, fh): + return self.handles.get(fh) + + def _make_stat_dict(self, node): + """Create a stat dictionary from a node.""" + item = self.get_inode(node.ino) + st = {} + st["st_ino"] = node.ino + st["st_mode"] = item.mode & ~self.umask + st["st_nlink"] = item.get("nlink", 1) + if stat.S_ISDIR(st["st_mode"]): + st["st_nlink"] = max(st["st_nlink"], 2) + st["st_uid"], st["st_gid"] = get_item_uid_gid( + item, + numeric=self.numeric_ids, + uid_default=self.default_uid, + gid_default=self.default_gid, + uid_forced=self.uid_forced, + gid_forced=self.gid_forced, + ) + st["st_rdev"] = item.get("rdev", 0) + st["st_size"] = item.get_size() + st["st_blocks"] = (st["st_size"] + BLOCK_SIZE - 1) // BLOCK_SIZE + if getattr(self, "use_ns", False): + st["st_mtime"] = item.mtime + st["st_atime"] = item.get("atime", item.mtime) + st["st_ctime"] = item.get("ctime", item.mtime) + else: + st["st_mtime"] = item.mtime / 1e9 + st["st_atime"] = item.get("atime", item.mtime) / 1e9 + st["st_ctime"] = item.get("ctime", item.mtime) / 1e9 + return st + + +class borgfs(hlfuse.Operations, FuseBackend): + """Export archive as a FUSE filesystem""" + + use_ns = True + + def __init__(self, manifest, args, repository): + hlfuse.Operations.__init__(self) + FuseBackend.__init__(self, manifest, args, repository) + data_cache_capacity = int(os.environ.get("BORG_MOUNT_DATA_CACHE_ENTRIES", os.cpu_count() or 1)) + logger.debug("mount data cache capacity: %d chunks", data_cache_capacity) + self.data_cache = LRUCache(capacity=data_cache_capacity) + self._last_pos = LRUCache(capacity=4) + + def sig_info_handler(self, sig_no, stack): + # Simplified instrumentation + logger.debug("fuse: %d inodes", self.current_ino) + + def mount(self, mountpoint, mount_options, foreground=False, show_rc=False): + """Mount filesystem on *mountpoint* with *mount_options*.""" + + def pop_option(options, key, present, not_present, wanted_type, int_base=0): + assert isinstance(options, list) # we mutate this + for idx, option in enumerate(options): + if option == key: + options.pop(idx) + return present + if option.startswith(key + "="): + options.pop(idx) + value = option.split("=", 1)[1] + if wanted_type is bool: + v = value.lower() + if v in ("y", "yes", "true", "1"): + return True + if v in ("n", "no", "false", "0"): + return False + raise ValueError("unsupported value in option: %s" % option) + if wanted_type is int: + try: + return int(value, base=int_base) + except ValueError: + raise ValueError("unsupported value in option: %s" % option) from None + try: + return wanted_type(value) + except ValueError: + raise ValueError("unsupported value in option: %s" % option) from None + else: + return not_present + + options = ["fsname=borgfs", "ro", "default_permissions"] + if mount_options: + options.extend(mount_options.split(",")) + if is_darwin: + volname = pop_option(options, "volname", "", "", str) + volname = volname or f"{os.path.basename(mountpoint)} (borgfs)" + options.append(f"volname={volname}") + ignore_permissions = pop_option(options, "ignore_permissions", True, False, bool) + if ignore_permissions: + pop_option(options, "default_permissions", True, False, bool) + self.allow_damaged_files = pop_option(options, "allow_damaged_files", True, False, bool) + self.versions = pop_option(options, "versions", True, False, bool) + self.uid_forced = pop_option(options, "uid", None, None, int) + self.gid_forced = pop_option(options, "gid", None, None, int) + self.umask = pop_option(options, "umask", 0, 0, int, int_base=8) + dir_uid = self.uid_forced if self.uid_forced is not None else self.default_uid + dir_gid = self.gid_forced if self.gid_forced is not None else self.default_gid + dir_user = uid2user(dir_uid) + dir_group = gid2group(dir_gid) + assert isinstance(dir_user, str) + assert isinstance(dir_group, str) + dir_mode = 0o40755 & ~self.umask + self.default_dir = Item( + mode=dir_mode, mtime=int(time.time() * 1e9), user=dir_user, group=dir_group, uid=dir_uid, gid=dir_gid + ) + self._create_filesystem() + + # hlfuse.FUSE will block if foreground=True, otherwise it returns immediately + if not foreground: + # Background mode: daemonize first, then start FUSE (blocking) + if isinstance(self.repository, RemoteRepository): + daemonize() + else: + with daemonizing(show_rc=show_rc) as (old_id, new_id): + logger.debug("fuse: mount local repo, going to background: migrating lock.") + self.repository.migrate_lock(old_id, new_id) + + # Run the FUSE main loop in foreground (we might be daemonized already or not) + with signal_handler("SIGUSR1", self.sig_info_handler), signal_handler("SIGINFO", self.sig_info_handler): + hlfuse.FUSE(self, mountpoint, options, foreground=True, use_ino=True) + + def statfs(self, path): + debug_log(f"statfs(path={path!r})") + stat_ = {} + stat_["f_bsize"] = BLOCK_SIZE + stat_["f_frsize"] = BLOCK_SIZE + stat_["f_blocks"] = 0 + stat_["f_bfree"] = 0 + stat_["f_bavail"] = 0 + stat_["f_files"] = 0 + stat_["f_ffree"] = 0 + stat_["f_favail"] = 0 + stat_["f_namemax"] = 255 + debug_log(f"statfs -> {stat_}") + return stat_ + + def getattr(self, path, fh=None): + debug_log(f"getattr(path={path!r}, fh={fh})") + if fh is not None: + # use file handle if available to avoid path lookup + node = self._get_node_from_handle(fh) + if node is None: + raise hlfuse.FuseOSError(errno.EBADF) + else: + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + st = self._make_stat_dict(node) + debug_log(f"getattr -> {st}") + return st + + def listxattr(self, path): + debug_log(f"listxattr(path={path!r})") + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + item = self.get_inode(node.ino) + result = [k.decode("utf-8", "surrogateescape") for k in item.get("xattrs", {}).keys()] + debug_log(f"listxattr -> {result}") + return result + + def getxattr(self, path, name, position=0): + debug_log(f"getxattr(path={path!r}, name={name!r}, position={position})") + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + item = self.get_inode(node.ino) + try: + if isinstance(name, str): + name = name.encode("utf-8", "surrogateescape") + result = item.get("xattrs", {})[name] or b"" + debug_log(f"getxattr -> {len(result)} bytes") + return result + except KeyError: + debug_log("getxattr -> ENOATTR") + raise hlfuse.FuseOSError(ENOATTR) from None + + def open(self, path, fi): + debug_log(f"open(path={path!r}, fi={fi})") + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + fh = self._get_handle(node) + fi.fh = fh + debug_log(f"open -> fh={fh}") + return 0 + + def release(self, path, fi): + debug_log(f"release(path={path!r}, fh={fi.fh})") + self.handles.pop(fi.fh, None) + self._last_pos.pop(fi.fh, None) + return 0 + + def create(self, path, mode, fi=None): + debug_log(f"create(path={path!r}, mode={mode}, fi={fi}) -> EROFS") + raise hlfuse.FuseOSError(errno.EROFS) + + def read(self, path, size, offset, fi): + fh = fi.fh + debug_log(f"read(path={path!r}, size={size}, offset={offset}, fh={fh})") + node = self._get_node_from_handle(fh) + if node is None: + raise hlfuse.FuseOSError(errno.EBADF) + + item = self.get_inode(node.ino) + parts = [] + + # optimize for linear reads: + chunk_no, chunk_offset = self._last_pos.get(fh, (0, 0)) + if chunk_offset > offset: + chunk_no, chunk_offset = (0, 0) + + offset -= chunk_offset + chunks = item.chunks + + for idx in range(chunk_no, len(chunks)): + id, s = chunks[idx] + if s < offset: + offset -= s + chunk_offset += s + chunk_no += 1 + continue + n = min(size, s - offset) + if id in self.data_cache: + data = self.data_cache[id] + if offset + n == len(data): + del self.data_cache[id] + else: + try: + # Direct repository access + cdata = self.repository.get(id) + except Repository.ObjectNotFound: + if self.allow_damaged_files: + data = zeros[:s] + assert len(data) == s + else: + raise hlfuse.FuseOSError(errno.EIO) from None + else: + _, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM) + if offset + n < len(data): + self.data_cache[id] = data + parts.append(data[offset : offset + n]) + offset = 0 + size -= n + if not size: + if fh in self._last_pos: + self._last_pos.replace(fh, (chunk_no, chunk_offset)) + else: + self._last_pos[fh] = (chunk_no, chunk_offset) + break + result = b"".join(parts) + debug_log(f"read -> {len(result)} bytes") + return result + + def readdir(self, path, fh=None): + debug_log(f"readdir(path={path!r}, fh={fh})") + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + + offset = 0 + offset += 0 # += 1 + debug_log(f"readdir yielding . {offset}") + yield (".", self._make_stat_dict(node), offset) + offset += 0 # += 1 + debug_log(f"readdir yielding .. {offset}") + parent = node.parent if node.parent else node + yield ("..", self._make_stat_dict(parent), offset) + + for name, child_node in node.iter_children(): + name_str = name.decode("utf-8", "surrogateescape") + st = self._make_stat_dict(child_node) + offset += 0 # += 1 + debug_log(f"readdir yielding {name_str} {offset} {st}") + yield (name_str, st, offset) + + def readlink(self, path): + debug_log(f"readlink(path={path!r})") + node = self._find_node(path) + if node is None: + raise hlfuse.FuseOSError(errno.ENOENT) + item = self.get_inode(node.ino) + result = item.target + debug_log(f"readlink -> {result!r}") + return result diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index 9eac7462c3..0a75d743d8 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -20,7 +20,7 @@ except: # noqa raises = None -from ..fuse_impl import llfuse, has_llfuse, has_pyfuse3 # NOQA +from ..fuse_impl import llfuse, has_any_fuse, has_llfuse, has_pyfuse3, has_mfusepy, ENOATTR # NOQA from .. import platform from ..platformflags import is_win32, is_darwin diff --git a/src/borg/testsuite/archiver/__init__.py b/src/borg/testsuite/archiver/__init__.py index d0e17fc20f..667bf413d7 100644 --- a/src/borg/testsuite/archiver/__init__.py +++ b/src/borg/testsuite/archiver/__init__.py @@ -27,7 +27,7 @@ from ...remote import RemoteRepository from ...repository import Repository from .. import has_lchflags, has_mknod, is_utime_fully_supported, have_fuse_mtime_ns, st_mtime_ns_round, filter_xattrs -from .. import changedir +from .. import changedir, ENOATTR # NOQA from .. import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported, granularity_sleep from ..platform.platform_test import is_win32 from ...xattr import get_all diff --git a/src/borg/testsuite/archiver/mount_cmds_test.py b/src/borg/testsuite/archiver/mount_cmds_test.py index 6209ad7089..c979ba4e3d 100644 --- a/src/borg/testsuite/archiver/mount_cmds_test.py +++ b/src/borg/testsuite/archiver/mount_cmds_test.py @@ -1,3 +1,9 @@ +# This file tests the mount/umount commands. +# The FUSE implementation used depends on the BORG_FUSE_IMPL environment variable: +# - BORG_FUSE_IMPL=pyfuse3,llfuse: Tests run with llfuse/pyfuse3 (skipped if not available) +# - BORG_FUSE_IMPL=mfusepy: Tests run with mfusepy (skipped if not available) +# The tox configuration (pyproject.toml) runs these tests with different BORG_FUSE_IMPL settings. + import errno import os import stat @@ -7,10 +13,9 @@ from ... import xattr, platform from ...constants import * # NOQA -from ...platform import ENOATTR from ...storelocking import Lock from ...helpers import flags_noatime, flags_normal -from .. import has_lchflags, llfuse +from .. import has_lchflags, has_any_fuse, ENOATTR from .. import changedir, filter_xattrs, same_ts_ns from .. import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported from ..platform.platform_test import fakeroot_detected @@ -21,7 +26,7 @@ @requires_hardlinks -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse_mount_hardlinks(archivers, request): archiver = request.getfixturevalue(archivers) _extract_hardlinks_setup(archiver) @@ -59,7 +64,7 @@ def test_fuse_mount_hardlinks(archivers, request): assert open("input/dir1/subdir/hardlink", "rb").read() == b"123456" -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse(archivers, request): archiver = request.getfixturevalue(archivers) if archiver.EXE and fakeroot_detected(): @@ -167,7 +172,7 @@ def has_noatime(some_file): raise -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse_versions_view(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) @@ -201,7 +206,7 @@ def test_fuse_versions_view(archivers, request): assert open(hl3, "rb").read() == b"123456" -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse_duplicate_name(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) @@ -219,7 +224,7 @@ def test_fuse_duplicate_name(archivers, request): assert "unique2" in dirs -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse_allow_damaged_files(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) @@ -249,7 +254,7 @@ def test_fuse_allow_damaged_files(archivers, request): assert data.endswith(b"\0\0") -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_fuse_mount_options(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) @@ -272,7 +277,7 @@ def test_fuse_mount_options(archivers, request): assert sorted(os.listdir(os.path.join(mountpoint))) == [] -@pytest.mark.skipif(not llfuse, reason="llfuse not installed") +@pytest.mark.skipif(not has_any_fuse, reason="FUSE not available") def test_migrate_lock_alive(archivers, request): """Both old_id and new_id must not be stale during lock migration / daemonization.""" archiver = request.getfixturevalue(archivers)