Skip to content

Commit 5471468

Browse files
committed
Merge branch 'ds/path-walk-filters' into seen
The "git pack-objects --path-walk" traversal has been integrated with several object filters, including blobless and sparse filters. Comments? * ds/path-walk-filters: pack-objects: support sparse:oid filter with path-walk path-walk: add pl_sparse_trees to control tree pruning path-walk: support blob size limit filter backfill: die on incompatible filter options path-walk: support blobless filter t/perf: add pack-objects filter and path-walk benchmark pack-objects: pass --objects with --path-walk
2 parents 04a8af5 + 465ceb3 commit 5471468

9 files changed

Lines changed: 758 additions & 16 deletions

builtin/backfill.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs)
9999
if (revs->filter.choice)
100100
die(_("'%s' cannot be used with 'git backfill'"),
101101
"--filter");
102+
if (!path_walk_filter_compatible(&revs->filter))
103+
die(_("cannot backfill with these filter options"));
104+
if (revs->filter.blob_limit_value)
105+
die(_("cannot backfill with blob size limits"));
102106
}
103107

104108
static int do_backfill(struct backfill_context *ctx)
@@ -108,6 +112,7 @@ static int do_backfill(struct backfill_context *ctx)
108112

109113
if (ctx->sparse) {
110114
CALLOC_ARRAY(info.pl, 1);
115+
info.pl_sparse_trees = 1;
111116
if (get_sparse_checkout_patterns(info.pl)) {
112117
path_walk_info_clear(&info);
113118
return error(_("problem loading sparse-checkout"));

builtin/pack-objects.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,8 @@ static void get_object_list_path_walk(struct rev_info *revs)
47884788
result = walk_objects_by_path(&info);
47894789
trace2_region_leave("pack-objects", "path-walk", revs->repo);
47904790

4791+
path_walk_info_clear(&info);
4792+
47914793
if (result)
47924794
die(_("failed to pack objects via path-walk"));
47934795
}
@@ -5190,7 +5192,7 @@ int cmd_pack_objects(int argc,
51905192

51915193
if (path_walk) {
51925194
const char *option = NULL;
5193-
if (filter_options.choice)
5195+
if (!path_walk_filter_compatible(&filter_options))
51945196
option = "--filter";
51955197
else if (use_delta_islands)
51965198
option = "--delta-islands";
@@ -5203,10 +5205,7 @@ int cmd_pack_objects(int argc,
52035205
}
52045206
if (path_walk) {
52055207
strvec_push(&rp, "--boundary");
5206-
/*
5207-
* We must disable the bitmaps because we are removing
5208-
* the --objects / --objects-edge[-aggressive] options.
5209-
*/
5208+
strvec_push(&rp, "--objects");
52105209
use_bitmap_index = 0;
52115210
} else if (thin) {
52125211
use_internal_rev_list = 1;

path-walk.c

Lines changed: 145 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include "hashmap.h"
1010
#include "hex.h"
1111
#include "list-objects.h"
12+
#include "list-objects-filter-options.h"
13+
#include "object-name.h"
14+
#include "odb.h"
1215
#include "object.h"
1316
#include "oid-array.h"
1417
#include "path.h"
@@ -178,11 +181,6 @@ static int add_tree_entries(struct path_walk_context *ctx,
178181
return -1;
179182
}
180183

181-
/* Skip this object if already seen. */
182-
if (o->flags & SEEN)
183-
continue;
184-
o->flags |= SEEN;
185-
186184
strbuf_setlen(&path, base_len);
187185
strbuf_add(&path, entry.path, entry.pathlen);
188186

@@ -193,6 +191,40 @@ static int add_tree_entries(struct path_walk_context *ctx,
193191
if (type == OBJ_TREE)
194192
strbuf_addch(&path, '/');
195193

194+
if (o->flags & SEEN) {
195+
/*
196+
* A tree with a shared OID may appear at multiple
197+
* paths. Even though we already added this tree to
198+
* the output at some other path, we still need to
199+
* walk into it at this in-cone path to discover
200+
* blobs that were not found at the earlier
201+
* out-of-cone path.
202+
*
203+
* Only do this for paths not yet in our map, to
204+
* avoid duplicate entries when the same tree OID
205+
* appears at the same path across multiple commits.
206+
*/
207+
if (type == OBJ_TREE && ctx->info->pl &&
208+
ctx->info->pl->use_cone_patterns &&
209+
!ctx->info->pl_sparse_trees &&
210+
!strmap_contains(&ctx->paths_to_lists, path.buf)) {
211+
int dtype;
212+
enum pattern_match_result m;
213+
m = path_matches_pattern_list(path.buf, path.len,
214+
path.buf + base_len,
215+
&dtype,
216+
ctx->info->pl,
217+
ctx->repo->index);
218+
if (m != NOT_MATCHED) {
219+
add_path_to_list(ctx, path.buf, type,
220+
&entry.oid,
221+
!(o->flags & UNINTERESTING));
222+
push_to_stack(ctx, path.buf);
223+
}
224+
}
225+
continue;
226+
}
227+
196228
if (ctx->info->pl) {
197229
int dtype;
198230
enum pattern_match_result match;
@@ -202,7 +234,8 @@ static int add_tree_entries(struct path_walk_context *ctx,
202234
ctx->repo->index);
203235

204236
if (ctx->info->pl->use_cone_patterns &&
205-
match == NOT_MATCHED)
237+
match == NOT_MATCHED &&
238+
(type == OBJ_BLOB || ctx->info->pl_sparse_trees))
206239
continue;
207240
else if (!ctx->info->pl->use_cone_patterns &&
208241
type == OBJ_BLOB &&
@@ -237,6 +270,7 @@ static int add_tree_entries(struct path_walk_context *ctx,
237270
continue;
238271
}
239272

273+
o->flags |= SEEN;
240274
add_path_to_list(ctx, path.buf, type, &entry.oid,
241275
!(o->flags & UNINTERESTING));
242276

@@ -314,9 +348,29 @@ static int walk_path(struct path_walk_context *ctx,
314348
/* Evaluate function pointer on this data, if requested. */
315349
if ((list->type == OBJ_TREE && ctx->info->trees) ||
316350
(list->type == OBJ_BLOB && ctx->info->blobs) ||
317-
(list->type == OBJ_TAG && ctx->info->tags))
318-
ret = ctx->info->path_fn(path, &list->oids, list->type,
319-
ctx->info->path_fn_data);
351+
(list->type == OBJ_TAG && ctx->info->tags)) {
352+
struct oid_array *oids = &list->oids;
353+
struct oid_array filtered = OID_ARRAY_INIT;
354+
355+
if (list->type == OBJ_BLOB && ctx->info->blob_limit) {
356+
for (size_t i = 0; i < list->oids.nr; i++) {
357+
unsigned long size;
358+
359+
if (odb_read_object_info(ctx->repo->objects,
360+
&list->oids.oid[i],
361+
&size) != OBJ_BLOB ||
362+
size < ctx->info->blob_limit)
363+
oid_array_append(&filtered,
364+
&list->oids.oid[i]);
365+
}
366+
oids = &filtered;
367+
}
368+
369+
if (oids->nr)
370+
ret = ctx->info->path_fn(path, oids, list->type,
371+
ctx->info->path_fn_data);
372+
oid_array_clear(&filtered);
373+
}
320374

321375
/* Expand data for children. */
322376
if (list->type == OBJ_TREE) {
@@ -485,6 +539,85 @@ static int setup_pending_objects(struct path_walk_info *info,
485539
return 0;
486540
}
487541

542+
static int prepare_filters(struct path_walk_info *info,
543+
struct list_objects_filter_options *options)
544+
{
545+
switch (options->choice) {
546+
case LOFC_DISABLED:
547+
return 1;
548+
549+
case LOFC_BLOB_NONE:
550+
if (info) {
551+
info->blobs = 0;
552+
list_objects_filter_release(options);
553+
}
554+
return 1;
555+
556+
case LOFC_BLOB_LIMIT:
557+
if (info) {
558+
if (!options->blob_limit_value) {
559+
info->blobs = 0;
560+
} else {
561+
info->blob_limit = options->blob_limit_value;
562+
}
563+
list_objects_filter_release(options);
564+
}
565+
return 1;
566+
567+
case LOFC_SPARSE_OID:
568+
if (info) {
569+
struct object_id sparse_oid;
570+
struct repository *repo = info->revs->repo;
571+
572+
if (info->pl) {
573+
warning(_("sparse filter cannot be combined with existing sparse patterns"));
574+
return 0;
575+
}
576+
577+
if (repo_get_oid_with_flags(repo,
578+
options->sparse_oid_name,
579+
&sparse_oid,
580+
GET_OID_BLOB)) {
581+
error(_("unable to access sparse blob in '%s'"),
582+
options->sparse_oid_name);
583+
return 0;
584+
}
585+
586+
CALLOC_ARRAY(info->pl, 1);
587+
info->pl->use_cone_patterns = 1;
588+
589+
if (add_patterns_from_blob_to_list(&sparse_oid, "", 0,
590+
info->pl) < 0) {
591+
clear_pattern_list(info->pl);
592+
FREE_AND_NULL(info->pl);
593+
error(_("unable to parse sparse filter data in '%s'"),
594+
oid_to_hex(&sparse_oid));
595+
return 0;
596+
}
597+
598+
if (!info->pl->use_cone_patterns) {
599+
clear_pattern_list(info->pl);
600+
FREE_AND_NULL(info->pl);
601+
warning(_("sparse filter is not cone-mode compatible"));
602+
return 0;
603+
}
604+
605+
list_objects_filter_release(options);
606+
}
607+
return 1;
608+
609+
default:
610+
error(_("object filter '%s' not supported by the path-walk API"),
611+
list_objects_filter_spec(options));
612+
return 0;
613+
}
614+
}
615+
616+
int path_walk_filter_compatible(struct list_objects_filter_options *options)
617+
{
618+
return prepare_filters(NULL, options);
619+
}
620+
488621
/**
489622
* Given the configuration of 'info', walk the commits based on 'info->revs' and
490623
* call 'info->path_fn' on each discovered path.
@@ -512,6 +645,9 @@ int walk_objects_by_path(struct path_walk_info *info)
512645

513646
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
514647

648+
if (!prepare_filters(info, &info->revs->filter))
649+
return -1;
650+
515651
CALLOC_ARRAY(commit_list, 1);
516652
commit_list->type = OBJ_COMMIT;
517653

path-walk.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ struct path_walk_info {
4242
int blobs;
4343
int tags;
4444

45+
/**
46+
* If non-zero, specifies a maximum blob size. Blobs with a
47+
* size equal to or greater than this limit will be omitted
48+
* from the walk. Blobs smaller than the limit (or blobs
49+
* whose size cannot be determined) are still visited.
50+
*/
51+
unsigned long blob_limit;
52+
4553
/**
4654
* When 'prune_all_uninteresting' is set and a path has all objects
4755
* marked as UNINTERESTING, then the path-walk will not visit those
@@ -64,8 +72,14 @@ struct path_walk_info {
6472
* of the cone. If not in cone mode, then all tree paths will be
6573
* explored but the path_fn will only be called when the path matches
6674
* the sparse-checkout patterns.
75+
*
76+
* When 'pl_sparse_trees' is zero, the sparse patterns only restrict
77+
* blobs and all trees are included in the walk output. This matches
78+
* the behavior of the sparse:oid object filter. When nonzero, trees
79+
* are also pruned by the sparse patterns (as used by backfill).
6780
*/
6881
struct pattern_list *pl;
82+
int pl_sparse_trees;
6983
};
7084

7185
#define PATH_WALK_INFO_INIT { \
@@ -85,3 +99,10 @@ void path_walk_info_clear(struct path_walk_info *info);
8599
* Returns nonzero on an error.
86100
*/
87101
int walk_objects_by_path(struct path_walk_info *info);
102+
103+
struct list_objects_filter_options;
104+
/**
105+
* Given a set of options for filtering objects, return 1 if the options
106+
* are compatible with the path-walk API and 0 otherwise.
107+
*/
108+
int path_walk_filter_compatible(struct list_objects_filter_options *options);

t/helper/test-path-walk.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "dir.h"
55
#include "environment.h"
66
#include "hex.h"
7+
#include "list-objects-filter-options.h"
78
#include "object-name.h"
89
#include "object.h"
910
#include "pretty.h"
@@ -67,10 +68,12 @@ static int emit_block(const char *path, struct oid_array *oids,
6768

6869
int cmd__path_walk(int argc, const char **argv)
6970
{
70-
int res, stdin_pl = 0;
71+
int res, stdin_pl = 0, pl_sparse_trees = -1;
7172
struct rev_info revs = REV_INFO_INIT;
7273
struct path_walk_info info = PATH_WALK_INFO_INIT;
7374
struct path_walk_test_data data = { 0 };
75+
struct list_objects_filter_options filter_options =
76+
LIST_OBJECTS_FILTER_INIT;
7477
struct option options[] = {
7578
OPT_BOOL(0, "blobs", &info.blobs,
7679
N_("toggle inclusion of blob objects")),
@@ -86,11 +89,14 @@ int cmd__path_walk(int argc, const char **argv)
8689
N_("toggle aggressive edge walk")),
8790
OPT_BOOL(0, "stdin-pl", &stdin_pl,
8891
N_("read a pattern list over stdin")),
92+
OPT_BOOL(0, "pl-sparse-trees", &pl_sparse_trees,
93+
N_("toggle pruning of trees by sparse patterns")),
94+
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
8995
OPT_END(),
9096
};
9197

9298
setup_git_directory(the_repository);
93-
revs.repo = the_repository;
99+
repo_init_revisions(the_repository, &revs, NULL);
94100

95101
argc = parse_options(argc, argv, NULL,
96102
options, path_walk_usage,
@@ -101,13 +107,19 @@ int cmd__path_walk(int argc, const char **argv)
101107
else
102108
usage(path_walk_usage[0]);
103109

110+
/* Apply the filter after setup_revisions to avoid the --objects check. */
111+
if (filter_options.choice)
112+
list_objects_filter_copy(&revs.filter, &filter_options);
113+
104114
info.revs = &revs;
105115
info.path_fn = emit_block;
106116
info.path_fn_data = &data;
107117

108118
if (stdin_pl) {
109119
struct strbuf in = STRBUF_INIT;
110120
CALLOC_ARRAY(info.pl, 1);
121+
info.pl_sparse_trees = (pl_sparse_trees >= 0) ?
122+
pl_sparse_trees : 1;
111123

112124
info.pl->use_cone_patterns = 1;
113125

@@ -129,6 +141,7 @@ int cmd__path_walk(int argc, const char **argv)
129141
free(info.pl);
130142
}
131143

144+
list_objects_filter_release(&filter_options);
132145
release_revisions(&revs);
133146
return res;
134147
}

0 commit comments

Comments
 (0)