Skip to content

Commit 6ab08b8

Browse files
Merge pull request borgbackup#9264 from ThomasWaldmann/path-sep-windows
path sep and windows related changes
2 parents 880e41c + 50f4e54 commit 6ab08b8

File tree

15 files changed

+154
-79
lines changed

15 files changed

+154
-79
lines changed

requirements.d/pyinstaller.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
pyinstaller==6.14.2
1+
pyinstaller==6.18.0

src/borg/archive.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import errno
33
import json
44
import os
5+
import posixpath
56
import stat
67
import sys
78
import time
@@ -1243,8 +1244,8 @@ def __init__(
12431244
@contextmanager
12441245
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
12451246
if strip_prefix is not None:
1246-
assert not path.endswith(os.sep)
1247-
if strip_prefix.startswith(path + os.sep):
1247+
assert not path.endswith("/")
1248+
if strip_prefix.startswith(path + "/"):
12481249
# still on a directory level that shall be stripped - do not create an item for this!
12491250
yield None, "x", False, None
12501251
return
@@ -1547,7 +1548,7 @@ def s_to_ns(s):
15471548

15481549
# if the tar has names starting with "./", normalize them like borg create also does.
15491550
# ./dir/file must become dir/file in the borg archive.
1550-
normalized_path = os.path.normpath(tarinfo.name)
1551+
normalized_path = posixpath.normpath(tarinfo.name)
15511552
item = Item(
15521553
path=make_path_safe(normalized_path),
15531554
mode=tarinfo.mode | type,
@@ -1608,7 +1609,7 @@ def process_symlink(self, *, tarinfo, status, type):
16081609
def process_hardlink(self, *, tarinfo, status, type):
16091610
with self.create_helper(tarinfo, status, type) as (item, status):
16101611
# create a not hardlinked borg item, reusing the chunks, see HardLinkManager.__doc__
1611-
normalized_path = os.path.normpath(tarinfo.linkname)
1612+
normalized_path = posixpath.normpath(tarinfo.linkname)
16121613
safe_path = make_path_safe(normalized_path)
16131614
chunks = self.hlm.retrieve(safe_path)
16141615
if chunks is not None:

src/borg/archiver/create_cmd.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import logging
55
import os
6+
import posixpath
67
import stat
78
import subprocess
89
import time
@@ -16,11 +17,11 @@
1617
from ..cache import Cache
1718
from ..constants import * # NOQA
1819
from ..compress import CompressionSpec
19-
from ..helpers import comment_validator, ChunkerParams, PathSpec
20+
from ..helpers import comment_validator, ChunkerParams, FilesystemPathSpec
2021
from ..helpers import archivename_validator, FilesCacheMode
2122
from ..helpers import eval_escapes
2223
from ..helpers import timestamp, archive_ts_now
23-
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
24+
from ..helpers import get_cache_dir, os_stat, get_strip_prefix, slashify
2425
from ..helpers import dir_is_tagged
2526
from ..helpers import log_multi
2627
from ..helpers import basic_json_data, json_print
@@ -106,8 +107,9 @@ def create_inner(archive, cache, fso):
106107
pipe_bin = sys.stdin.buffer
107108
pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
108109
for path in iter_separated(pipe, paths_sep):
110+
path = slashify(path)
109111
strip_prefix = get_strip_prefix(path)
110-
path = os.path.normpath(path)
112+
path = posixpath.normpath(path)
111113
try:
112114
with backup_io("stat"):
113115
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
@@ -160,7 +162,7 @@ def create_inner(archive, cache, fso):
160162
continue
161163

162164
strip_prefix = get_strip_prefix(path)
163-
path = os.path.normpath(path)
165+
path = posixpath.normpath(path)
164166
try:
165167
with backup_io("stat"):
166168
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
@@ -489,7 +491,7 @@ def _rec_walk(
489491
path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
490492
)
491493
for tag_name in tag_names:
492-
tag_path = os.path.join(path, tag_name)
494+
tag_path = posixpath.join(path, tag_name)
493495
self._rec_walk(
494496
path=tag_path,
495497
parent_fd=child_fd,
@@ -523,7 +525,7 @@ def _rec_walk(
523525
with backup_io("scandir"):
524526
entries = helpers.scandir_inorder(path=path, fd=child_fd)
525527
for dirent in entries:
526-
normpath = os.path.normpath(os.path.join(path, dirent.name))
528+
normpath = posixpath.normpath(posixpath.join(path, dirent.name))
527529
self._rec_walk(
528530
path=normpath,
529531
parent_fd=child_fd,
@@ -962,5 +964,5 @@ def build_parser_create(self, subparsers, common_parser, mid_common_parser):
962964

963965
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
964966
subparser.add_argument(
965-
"paths", metavar="PATH", nargs="*", type=PathSpec, action="extend", help="paths to archive"
967+
"paths", metavar="PATH", nargs="*", type=FilesystemPathSpec, action="extend", help="paths to archive"
966968
)

src/borg/archiver/extract_cmd.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import sys
22
import argparse
33
import logging
4-
import os
54
import stat
65

76
from ._common import with_repository, with_archive
@@ -60,7 +59,7 @@ def do_extract(self, args, repository, manifest, archive):
6059
for item in archive.iter_items():
6160
orig_path = item.path
6261
if strip_components:
63-
stripped_path = os.sep.join(orig_path.split(os.sep)[strip_components:])
62+
stripped_path = "/".join(orig_path.split("/")[strip_components:])
6463
if not stripped_path:
6564
continue
6665
item.path = stripped_path

src/borg/archiver/help_cmd.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ class HelpMixIn:
3535
start with ``src``.
3636
- When you back up relative paths like ``../../src``, the archived paths
3737
start with ``src``.
38+
- On native Windows, archived absolute paths look like ``C/Windows/System32``.
3839
3940
Borg supports different pattern styles. To define a non-default
4041
style for a specific pattern, prefix it with two characters followed
4142
by a colon ':' (i.e. ``fm:path/*``, ``sh:path/**``).
4243
44+
Note: Windows users must only use forward slashes in patterns, not backslashes.
45+
4346
The default pattern style for ``--exclude`` differs from ``--pattern``, see below.
4447
4548
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector ``fm:``
@@ -48,8 +51,8 @@ class HelpMixIn:
4851
any number of characters, '?' matching any single character, '[...]'
4952
matching any single character specified, including ranges, and '[!...]'
5053
matching any character not specified. For the purpose of these patterns,
51-
the path separator (backslash for Windows and '/' on other systems) is not
52-
treated specially. Wrap meta-characters in brackets for a literal
54+
the path separator (forward slash '/') is not treated specially.
55+
Wrap meta-characters in brackets for a literal
5356
match (i.e. ``[?]`` to match the literal character '?'). For a path
5457
to match a pattern, the full path must match, or it must match
5558
from the start of the full path to just before a path separator. Except
@@ -69,9 +72,7 @@ class HelpMixIn:
6972
`Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
7073
Unlike shell patterns, regular expressions are not required to match the full
7174
path and any substring match is sufficient. It is strongly recommended to
72-
anchor patterns to the start ('^'), to the end ('$') or both. Path
73-
separators (backslash for Windows and '/' on other systems) in paths are
74-
always normalized to a forward slash '/' before applying a pattern.
75+
anchor patterns to the start ('^'), to the end ('$') or both.
7576
7677
Path prefix, selector ``pp:``
7778
This pattern style is useful to match whole subdirectories. The pattern
@@ -103,6 +104,20 @@ class HelpMixIn:
103104
cannot supply ``re:`` patterns. Further, ensure that ``sh:`` and
104105
``fm:`` patterns only contain a handful of wildcards at most.
105106
107+
.. note::
108+
109+
**Windows path handling**: All paths in Borg archives use forward slashes (``/``)
110+
as path separators, regardless of the platform. When creating archives on Windows,
111+
backslashes from filesystem paths are automatically converted to forward slashes.
112+
113+
.. note::
114+
115+
**Windows reserved characters**: On Windows, when extracting archives created on
116+
POSIX systems, paths may contain characters that are reserved from being used in
117+
file or directory names (like: ``< > : " \\ | ? *``).
118+
These are replaced by characters in the unicode private use area (``U+F0xx``) like
119+
the CIFS mapchars feature also does it. It won't be pretty, but at least it works.
120+
106121
Exclusions can be passed via the command line option ``--exclude``. When used
107122
from within a shell, the patterns should be quoted to protect them from
108123
expansion.

src/borg/helpers/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,23 @@
2020
from .fs import ensure_dir, join_base_dir, get_socket_filename
2121
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
2222
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
23-
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount
23+
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount, slashify
2424
from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
2525
from .fs import HardLinkManager
2626
from .misc import sysinfo, log_multi, consume
2727
from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
2828
from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode
2929
from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
3030
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
31-
from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
31+
from .parseformat import (
32+
PathSpec,
33+
FilesystemPathSpec,
34+
SortBySpec,
35+
ChunkerParams,
36+
FilesCacheMode,
37+
partial_format,
38+
DatetimeWrapper,
39+
)
3240
from .parseformat import format_file_size, parse_file_size, FileSize
3341
from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator
3442
from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator

src/borg/helpers/fs.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -249,13 +249,55 @@ def make_path_safe(path):
249249
For reasons of security, a ValueError is raised should
250250
`path` contain any '..' elements.
251251
"""
252+
if "\\.." in path or "..\\" in path:
253+
raise ValueError(f"unexpected '..' element in path {path!r}")
254+
255+
path = map_chars(path)
256+
252257
path = path.lstrip("/")
253258
if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
254259
raise ValueError(f"unexpected '..' element in path {path!r}")
255260
path = posixpath.normpath(path)
256261
return path
257262

258263

264+
def slashify(path):
265+
"""
266+
Replace backslashes with forward slashes if running on Windows.
267+
268+
Use case: we always want to use forward slashes, even on Windows.
269+
"""
270+
return path.replace("\\", "/") if is_win32 else path
271+
272+
273+
# Bijective mapping to Unicode Private Use Area (like cifs mapchars)
274+
WINDOWS_MAP_CHARS = str.maketrans(
275+
{
276+
"<": "\uF03C",
277+
">": "\uF03E",
278+
":": "\uF03A",
279+
'"': "\uF022",
280+
"\\": "\uF05C",
281+
"|": "\uF07C",
282+
"?": "\uF03F",
283+
"*": "\uF02A",
284+
}
285+
)
286+
287+
288+
def map_chars(path):
289+
"""
290+
Map reserved characters if running on Windows.
291+
292+
Use case: if an archived path contains reserved characters (that are not reserved on POSIX)
293+
we need to replace them with replacements to make the path usable on Windows.
294+
"""
295+
if not is_win32:
296+
return path
297+
298+
return path.translate(WINDOWS_MAP_CHARS)
299+
300+
259301
def get_strip_prefix(path):
260302
# similar to how rsync does it, we allow users to give paths like:
261303
# /this/gets/stripped/./this/is/kept
@@ -265,7 +307,7 @@ def get_strip_prefix(path):
265307
pos = path.find("/./") # detect slashdot hack
266308
if pos > 0:
267309
# found a prefix to strip! make sure it ends with one "/"!
268-
return os.path.normpath(path[:pos]) + os.sep
310+
return posixpath.normpath(path[:pos]) + "/"
269311
else:
270312
# no or empty prefix, nothing to strip!
271313
return None
@@ -276,15 +318,14 @@ def get_strip_prefix(path):
276318

277319
def remove_dotdot_prefixes(path):
278320
"""
279-
Remove '../'s at the beginning of `path`. Additionally,
280-
the path is made relative.
321+
Remove '../'s at the beginning of `path`. Additionally, the path is made relative.
281322
282-
`path` is expected to be normalized already (e.g. via `os.path.normpath()`).
323+
`path` is expected to be normalized already (e.g. via `posixpath.normpath()`).
283324
"""
325+
assert "\\" not in path
284326
if is_win32:
285327
if len(path) > 1 and path[1] == ":":
286328
path = path.replace(":", "", 1)
287-
path = path.replace("\\", "/")
288329

289330
path = path.lstrip("/")
290331
path = _dotdot_re.sub("", path)

src/borg/helpers/parseformat.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@
1818
from string import Formatter
1919

2020
from ..logger import create_logger
21-
from ..platformflags import is_win32
2221

2322
logger = create_logger()
2423

2524
from .errors import Error
26-
from .fs import get_keys_dir, make_path_safe
25+
from .fs import get_keys_dir, make_path_safe, slashify
2726
from .msgpack import Timestamp
2827
from .time import OutputTimestamp, format_time, safe_timestamp
2928
from .. import __version__ as borg_version
3029
from .. import __version_tuple__ as borg_version_tuple
3130
from ..constants import * # NOQA
31+
from ..platformflags import is_win32
3232

3333
if TYPE_CHECKING:
3434
from ..item import ItemDiff
@@ -335,6 +335,12 @@ def PathSpec(text):
335335
return text
336336

337337

338+
def FilesystemPathSpec(text):
339+
if not text:
340+
raise argparse.ArgumentTypeError("Empty strings are not accepted as paths.")
341+
return slashify(text)
342+
343+
338344
def SortBySpec(text):
339345
from ..manifest import AI_HUMAN_SORT_KEYS
340346

@@ -558,7 +564,8 @@ def _parse(self, text):
558564
m = self.local_re.match(text)
559565
if m:
560566
self.proto = "file"
561-
self.path = os.path.abspath(os.path.normpath(m.group("path")))
567+
path = m.group("path")
568+
self.path = slashify(os.path.abspath(path)) if is_win32 else os.path.abspath(path)
562569
return True
563570
return False
564571

src/borg/item.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
77
from .constants import ITEM_KEYS, ARCHIVE_KEYS
88
from .helpers import StableDict
99
from .helpers import format_file_size
10-
from .helpers.fs import assert_sanitized_path, to_sanitized_path
10+
from .helpers.fs import assert_sanitized_path, to_sanitized_path, map_chars, slashify
1111
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
1212
from .helpers.time import OutputTimestamp, safe_timestamp
1313

@@ -265,7 +265,7 @@ cdef class Item(PropDict):
265265

266266
path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
267267
source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target
268-
target = PropDictProperty(str, 'surrogate-escaped str')
268+
target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=map_chars)
269269
user = PropDictProperty(str, 'surrogate-escaped str')
270270
group = PropDictProperty(str, 'surrogate-escaped str')
271271

src/borg/legacyrepository.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import stat
66
import struct
77
import time
8+
from pathlib import Path
89
from collections import defaultdict
910
from configparser import ConfigParser
1011
from functools import partial
@@ -27,7 +28,6 @@
2728
from .repoobj import RepoObj
2829
from .checksums import crc32, StreamingXXH64
2930
from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
30-
from .repository import _local_abspath_to_file_url
3131

3232
logger = create_logger(__name__)
3333

@@ -191,8 +191,9 @@ class PathPermissionDenied(Error):
191191
exit_mcode = 21
192192

193193
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, send_log_cb=None):
194-
self.path = os.path.abspath(path)
195-
self._location = Location(_local_abspath_to_file_url(self.path))
194+
p = Path(path).absolute()
195+
self.path = str(p)
196+
self._location = Location(p.as_uri())
196197
self.version = None
197198
# long-running repository methods which emit log or progress output are responsible for calling
198199
# the ._send_log method periodically to get log and progress output transferred to the borg client

0 commit comments

Comments
 (0)