Skip to content

Commit 96ebf46

Browse files
Replace merge_dist_dir.cmake with fileset_tool.py. (#32)
Rewrites the directory merging tool to Python and extends it with include/exclude filtering and other options. This patch should be NFC, but planning to use fileset_tool.py for managing distribution packages in a next step and needed it to be more flexible. --------- Co-authored-by: Scott Todd <[email protected]>
1 parent 7030216 commit 96ebf46

File tree

3 files changed

+213
-67
lines changed

3 files changed

+213
-67
lines changed

build_tools/fileset_tool.py

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#!/usr/bin/env python
2+
"""fileset_tool.py
3+
4+
Helper tool for manipulating filesets by listing matching files, copying,
5+
archiving, etc. This is ultimately inspired by the fileset manipulation behavior
6+
of Ant, which uses recursive glob include/exclude patterns rooted on some
7+
base directory to manage artifact moving and packaging.
8+
9+
This is based on a limited form of the pathlib.Path pattern language introduced
10+
in Python 3.13 (https://docs.python.org/3/library/pathlib.html#pattern-language)
11+
with the following changes:
12+
13+
* It does not support character classes.
14+
"""
15+
16+
from typing import Callable, Generator
17+
import argparse
18+
import os
19+
from pathlib import Path, PurePosixPath
20+
import re
21+
import shutil
22+
import sys
23+
24+
25+
class RecursiveGlobPattern:
26+
def __init__(self, glob: str):
27+
self.glob = glob
28+
pattern = f"^{re.escape(glob)}$"
29+
# Intermediate recursive directory match.
30+
pattern = pattern.replace("/\\*\\*/", "/(.*/)?")
31+
# First segment recursive directory match.
32+
pattern = pattern.replace("^\\*\\*/", "^(.*/)?")
33+
# Last segment recursive directory match.
34+
pattern = pattern.replace("/\\*\\*$", "(/.*)?$")
35+
# Intra-segment * match.
36+
pattern = pattern.replace("\\*", "[^/]*")
37+
# Intra-segment ? match.
38+
pattern = pattern.replace("\\?", "[^/]*")
39+
self.pattern = re.compile(pattern)
40+
41+
def matches(self, relpath: str, direntry: os.DirEntry[str]) -> bool:
42+
m = self.pattern.match(relpath)
43+
return True if m else False
44+
45+
46+
class PatternMatcher:
47+
def __init__(self, includes: list[str], excludes: list[str]):
48+
self.includes = [RecursiveGlobPattern(p) for p in includes]
49+
self.excludes = [RecursiveGlobPattern(p) for p in excludes]
50+
# Dictionary of relative posix-style path to DirEntry.
51+
# Last relative path wins.
52+
self.all: dict[str, os.DirEntry[str]] = {}
53+
54+
def add_basedir(self, basedir: Path):
55+
all = self.all
56+
basedir = basedir.absolute()
57+
58+
# Using scandir and being judicious about path concatenation/conversion
59+
# (versus using walk) is on the order of 10-50x faster. This is still
60+
# about 10x slower than an `ls -R` but gets us down to tens of
61+
# milliseconds for an LLVM install sized tree, which is acceptable.
62+
def scan_children(rootpath: str, prefix: str):
63+
with os.scandir(rootpath) as it:
64+
for entry in it:
65+
if entry.is_dir(follow_symlinks=False):
66+
relpath = f"{prefix}{entry.name}"
67+
new_rootpath = os.path.join(rootpath, entry.name)
68+
all[relpath] = entry
69+
scan_children(new_rootpath, f"{relpath}/")
70+
else:
71+
relpath = f"{prefix}{entry.name}"
72+
all[relpath] = entry
73+
74+
scan_children(basedir, "")
75+
76+
def matches(self) -> Generator[tuple[str, os.DirEntry[str]], None, None]:
77+
includes = self.includes
78+
excludes = self.excludes
79+
for match_path, direntry in self.all.items():
80+
if includes:
81+
for include in includes:
82+
if include.matches(match_path, direntry):
83+
break
84+
else:
85+
continue
86+
excluded = False
87+
for exclude in excludes:
88+
if exclude.matches(match_path, direntry):
89+
excluded = True
90+
break
91+
if not excluded:
92+
yield match_path, direntry
93+
94+
95+
def do_list(args: argparse.Namespace, pm: PatternMatcher):
96+
for relpath, direntry in pm.matches():
97+
print(relpath)
98+
99+
100+
def do_copy(args: argparse.Namespace, pm: PatternMatcher):
101+
verbose = args.verbose
102+
destdir: Path = args.dest_dir
103+
if args.remove_dest and destdir.exists():
104+
if verbose:
105+
print(f"rmtree {destdir}", file=sys.stderr)
106+
shutil.rmtree(destdir)
107+
destdir.mkdir(parents=True, exist_ok=True)
108+
for relpath, direntry in pm.matches():
109+
try:
110+
destpath = destdir / PurePosixPath(relpath)
111+
if direntry.is_dir(follow_symlinks=False):
112+
# Directory.
113+
if verbose:
114+
print(f"mkdir {destpath}", file=sys.stderr, end="")
115+
destpath.mkdir(parents=True, exist_ok=True)
116+
elif direntry.is_symlink():
117+
# Symlink.
118+
if not args.remove_dest and destpath.exists(follow_symlinks=False):
119+
os.unlink(destpath)
120+
targetpath = os.readlink(direntry.path)
121+
if verbose:
122+
print(
123+
f"symlink {targetpath} -> {destpath}", file=sys.stderr, end=""
124+
)
125+
os.symlink(targetpath, destpath)
126+
else:
127+
# Regular file.
128+
if not args.remove_dest and destpath.exists(follow_symlinks=False):
129+
os.unlink(destpath)
130+
destpath.parent.mkdir(parents=True, exist_ok=True)
131+
linked_file = False
132+
if not args.always_copy:
133+
# Attempt to link
134+
try:
135+
if verbose:
136+
print(
137+
f"hardlink {direntry.path} -> {destpath}",
138+
file=sys.stderr,
139+
end="",
140+
)
141+
os.link(direntry.path, destpath, follow_symlinks=False)
142+
linked_file = True
143+
except OSError:
144+
if verbose:
145+
print(" (falling back to copy) ", file=sys.stderr, end="")
146+
if not linked_file:
147+
# Make a copy instead.
148+
if verbose:
149+
print(
150+
f"copy {direntry.path} -> {destpath}",
151+
file=sys.stderr,
152+
end="",
153+
)
154+
shutil.copy2(direntry.path, destpath, follow_symlinks=False)
155+
finally:
156+
if verbose:
157+
print("", file=sys.stderr)
158+
159+
160+
def main(cl_args: list[str]):
161+
def add_pattern_matcher_args(p: argparse.ArgumentParser):
162+
p.add_argument("basedir", type=Path, nargs="*", help="Base directories to scan")
163+
p.add_argument("--include", nargs="+", help="Recursive glob pattern to include")
164+
p.add_argument("--exclude", nargs="+", help="Recursive glob pattern to exclude")
165+
p.add_argument("--verbose", action="store_true", help="Print verbose status")
166+
167+
def pattern_matcher_action(
168+
action: Callable[[argparse.Namespace, PatternMatcher], None]
169+
):
170+
def run_action(args: argparse.Namespace):
171+
if not args.basedir:
172+
# base dir is CWD
173+
args.basedir = [Path.cwd()]
174+
pm = PatternMatcher(args.include or [], args.exclude or [])
175+
for basedir in args.basedir:
176+
pm.add_basedir(basedir)
177+
action(args, pm)
178+
179+
return run_action
180+
181+
p = argparse.ArgumentParser(
182+
"fileset_tool.py", usage="fileset_tool.py {command} ..."
183+
)
184+
sub_p = p.add_subparsers(required=True)
185+
# 'copy' command
186+
copy_p = sub_p.add_parser("copy", help="Copy matching files to a destination dir")
187+
copy_p.add_argument("dest_dir", type=Path, help="Destination directory")
188+
copy_p.add_argument(
189+
"--always-copy", action="store_true", help="Always copy vs attempting to link"
190+
)
191+
copy_p.add_argument(
192+
"--remove-dest",
193+
default=True,
194+
action=argparse.BooleanOptionalAction,
195+
help="Remove the destination directory before copying",
196+
)
197+
add_pattern_matcher_args(copy_p)
198+
copy_p.set_defaults(func=pattern_matcher_action(do_copy))
199+
200+
# 'list' command
201+
list_p = sub_p.add_parser("list", help="List matching files to stdout")
202+
add_pattern_matcher_args(list_p)
203+
list_p.set_defaults(func=pattern_matcher_action(do_list))
204+
205+
args = p.parse_args(cl_args)
206+
args.func(args)
207+
208+
209+
if __name__ == "__main__":
210+
main(sys.argv[1:])

build_tools/merge_dist_dir.cmake

-64
This file was deleted.

cmake/therock_subproject.cmake

+3-3
Original file line numberDiff line numberDiff line change
@@ -482,18 +482,18 @@ function(therock_cmake_subproject_activate target_name)
482482

483483
# dist install target.
484484
set(_dist_stamp_file "${_stamp_dir}/dist.stamp")
485-
set(_merge_dist_script "${THEROCK_SOURCE_DIR}/build_tools/merge_dist_dir.cmake")
485+
set(_fileset_tool "${THEROCK_SOURCE_DIR}/build_tools/fileset_tool.py")
486486
_therock_cmake_subproject_get_stage_dirs(
487487
_dist_source_dirs "${target_name}" ${_runtime_deps})
488488
add_custom_command(
489489
OUTPUT "${_dist_stamp_file}"
490-
COMMAND "${CMAKE_COMMAND}" -P "${_merge_dist_script}" "${_dist_dir}" ${_dist_source_dirs}
490+
COMMAND "${Python3_EXECUTABLE}" "${_fileset_tool}" copy "${_dist_dir}" ${_dist_source_dirs}
491491
COMMAND "${CMAKE_COMMAND}" -E touch "${_dist_stamp_file}"
492492
COMMENT "Merging sub-project dist directory for ${target_name}"
493493
${_terminal_option}
494494
DEPENDS
495495
"${_stage_stamp_file}"
496-
"${_merge_dist_script}"
496+
"${_fileset_tool}"
497497
)
498498
add_custom_target(
499499
"${target_name}+dist"

0 commit comments

Comments
 (0)