Skip to content

Commit 0fe5043

Browse files
mhaggergitster
authored andcommitted
dir_iterator: new API for iterating over a directory tree
The iterator interface is modeled on that for references, though no vtable is necessary because there is (so far?) only one type of dir_iterator. There are obviously a lot of features that could easily be added to this class: * Skip/include directory paths in the iteration * Shallow/deep iteration * Letting the caller decide which subdirectories to recurse into (e.g., via a dir_iterator_advance_into() function) * Option to iterate in sorted order * Option to iterate over directory paths before vs. after their contents But these are not needed for the current patch series, so I refrain. Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent d24b21e commit 0fe5043

3 files changed

Lines changed: 290 additions & 0 deletions

File tree

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,7 @@ LIB_OBJS += diff-lib.o
722722
LIB_OBJS += diff-no-index.o
723723
LIB_OBJS += diff.o
724724
LIB_OBJS += dir.o
725+
LIB_OBJS += dir-iterator.o
725726
LIB_OBJS += editor.o
726727
LIB_OBJS += entry.o
727728
LIB_OBJS += environment.o

dir-iterator.c

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#include "cache.h"
2+
#include "dir.h"
3+
#include "iterator.h"
4+
#include "dir-iterator.h"
5+
6+
struct dir_iterator_level {
7+
int initialized;
8+
9+
DIR *dir;
10+
11+
/*
12+
* The length of the directory part of path at this level
13+
* (including a trailing '/'):
14+
*/
15+
size_t prefix_len;
16+
17+
/*
18+
* The last action that has been taken with the current entry
19+
* (needed for directories, which have to be included in the
20+
* iteration and also iterated into):
21+
*/
22+
enum {
23+
DIR_STATE_ITER,
24+
DIR_STATE_RECURSE
25+
} dir_state;
26+
};
27+
28+
/*
29+
* The full data structure used to manage the internal directory
30+
* iteration state. It includes members that are not part of the
31+
* public interface.
32+
*/
33+
struct dir_iterator_int {
34+
struct dir_iterator base;
35+
36+
/*
37+
* The number of levels currently on the stack. This is always
38+
* at least 1, because when it becomes zero the iteration is
39+
* ended and this struct is freed.
40+
*/
41+
size_t levels_nr;
42+
43+
/* The number of levels that have been allocated on the stack */
44+
size_t levels_alloc;
45+
46+
/*
47+
* A stack of levels. levels[0] is the uppermost directory
48+
* that will be included in this iteration.
49+
*/
50+
struct dir_iterator_level *levels;
51+
};
52+
53+
int dir_iterator_advance(struct dir_iterator *dir_iterator)
54+
{
55+
struct dir_iterator_int *iter =
56+
(struct dir_iterator_int *)dir_iterator;
57+
58+
while (1) {
59+
struct dir_iterator_level *level =
60+
&iter->levels[iter->levels_nr - 1];
61+
struct dirent *de;
62+
63+
if (!level->initialized) {
64+
/*
65+
* Note: dir_iterator_begin() ensures that
66+
* path is not the empty string.
67+
*/
68+
if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
69+
strbuf_addch(&iter->base.path, '/');
70+
level->prefix_len = iter->base.path.len;
71+
72+
level->dir = opendir(iter->base.path.buf);
73+
if (!level->dir && errno != ENOENT) {
74+
warning("error opening directory %s: %s",
75+
iter->base.path.buf, strerror(errno));
76+
/* Popping the level is handled below */
77+
}
78+
79+
level->initialized = 1;
80+
} else if (S_ISDIR(iter->base.st.st_mode)) {
81+
if (level->dir_state == DIR_STATE_ITER) {
82+
/*
83+
* The directory was just iterated
84+
* over; now prepare to iterate into
85+
* it.
86+
*/
87+
level->dir_state = DIR_STATE_RECURSE;
88+
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
89+
iter->levels_alloc);
90+
level = &iter->levels[iter->levels_nr++];
91+
level->initialized = 0;
92+
continue;
93+
} else {
94+
/*
95+
* The directory has already been
96+
* iterated over and iterated into;
97+
* we're done with it.
98+
*/
99+
}
100+
}
101+
102+
if (!level->dir) {
103+
/*
104+
* This level is exhausted (or wasn't opened
105+
* successfully); pop up a level.
106+
*/
107+
if (--iter->levels_nr == 0)
108+
return dir_iterator_abort(dir_iterator);
109+
110+
continue;
111+
}
112+
113+
/*
114+
* Loop until we find an entry that we can give back
115+
* to the caller:
116+
*/
117+
while (1) {
118+
strbuf_setlen(&iter->base.path, level->prefix_len);
119+
errno = 0;
120+
de = readdir(level->dir);
121+
122+
if (!de) {
123+
/* This level is exhausted; pop up a level. */
124+
if (errno) {
125+
warning("error reading directory %s: %s",
126+
iter->base.path.buf, strerror(errno));
127+
} else if (closedir(level->dir))
128+
warning("error closing directory %s: %s",
129+
iter->base.path.buf, strerror(errno));
130+
131+
level->dir = NULL;
132+
if (--iter->levels_nr == 0)
133+
return dir_iterator_abort(dir_iterator);
134+
break;
135+
}
136+
137+
if (is_dot_or_dotdot(de->d_name))
138+
continue;
139+
140+
strbuf_addstr(&iter->base.path, de->d_name);
141+
if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
142+
if (errno != ENOENT)
143+
warning("error reading path '%s': %s",
144+
iter->base.path.buf,
145+
strerror(errno));
146+
continue;
147+
}
148+
149+
/*
150+
* We have to set these each time because
151+
* the path strbuf might have been realloc()ed.
152+
*/
153+
iter->base.relative_path =
154+
iter->base.path.buf + iter->levels[0].prefix_len;
155+
iter->base.basename =
156+
iter->base.path.buf + level->prefix_len;
157+
level->dir_state = DIR_STATE_ITER;
158+
159+
return ITER_OK;
160+
}
161+
}
162+
}
163+
164+
int dir_iterator_abort(struct dir_iterator *dir_iterator)
165+
{
166+
struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator;
167+
168+
for (; iter->levels_nr; iter->levels_nr--) {
169+
struct dir_iterator_level *level =
170+
&iter->levels[iter->levels_nr - 1];
171+
172+
if (level->dir && closedir(level->dir)) {
173+
strbuf_setlen(&iter->base.path, level->prefix_len);
174+
warning("error closing directory %s: %s",
175+
iter->base.path.buf, strerror(errno));
176+
}
177+
}
178+
179+
free(iter->levels);
180+
strbuf_release(&iter->base.path);
181+
free(iter);
182+
return ITER_DONE;
183+
}
184+
185+
struct dir_iterator *dir_iterator_begin(const char *path)
186+
{
187+
struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
188+
struct dir_iterator *dir_iterator = &iter->base;
189+
190+
if (!path || !*path)
191+
die("BUG: empty path passed to dir_iterator_begin()");
192+
193+
strbuf_init(&iter->base.path, PATH_MAX);
194+
strbuf_addstr(&iter->base.path, path);
195+
196+
ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
197+
198+
iter->levels_nr = 1;
199+
iter->levels[0].initialized = 0;
200+
201+
return dir_iterator;
202+
}

dir-iterator.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#ifndef DIR_ITERATOR_H
2+
#define DIR_ITERATOR_H
3+
4+
/*
5+
* Iterate over a directory tree.
6+
*
7+
* Iterate over a directory tree, recursively, including paths of all
8+
* types and hidden paths. Skip "." and ".." entries and don't follow
9+
* symlinks except for the original path.
10+
*
11+
* Every time dir_iterator_advance() is called, update the members of
12+
* the dir_iterator structure to reflect the next path in the
13+
* iteration. The order that paths are iterated over within a
14+
* directory is undefined, but directory paths are always iterated
15+
* over before the subdirectory contents.
16+
*
17+
* A typical iteration looks like this:
18+
*
19+
* int ok;
20+
* struct iterator *iter = dir_iterator_begin(path);
21+
*
22+
* while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
23+
* if (want_to_stop_iteration()) {
24+
* ok = dir_iterator_abort(iter);
25+
* break;
26+
* }
27+
*
28+
* // Access information about the current path:
29+
* if (S_ISDIR(iter->st.st_mode))
30+
* printf("%s is a directory\n", iter->relative_path);
31+
* }
32+
*
33+
* if (ok != ITER_DONE)
34+
* handle_error();
35+
*
36+
* Callers are allowed to modify iter->path while they are working,
37+
* but they must restore it to its original contents before calling
38+
* dir_iterator_advance() again.
39+
*/
40+
41+
struct dir_iterator {
42+
/* The current path: */
43+
struct strbuf path;
44+
45+
/*
46+
* The current path relative to the starting path. This part
47+
* of the path always uses "/" characters to separate path
48+
* components:
49+
*/
50+
const char *relative_path;
51+
52+
/* The current basename: */
53+
const char *basename;
54+
55+
/* The result of calling lstat() on path: */
56+
struct stat st;
57+
};
58+
59+
/*
60+
* Start a directory iteration over path. Return a dir_iterator that
61+
* holds the internal state of the iteration.
62+
*
63+
* The iteration includes all paths under path, not including path
64+
* itself and not including "." or ".." entries.
65+
*
66+
* path is the starting directory. An internal copy will be made.
67+
*/
68+
struct dir_iterator *dir_iterator_begin(const char *path);
69+
70+
/*
71+
* Advance the iterator to the first or next item and return ITER_OK.
72+
* If the iteration is exhausted, free the dir_iterator and any
73+
* resources associated with it and return ITER_DONE. On error, free
74+
* dir_iterator and associated resources and return ITER_ERROR. It is
75+
* a bug to use iterator or call this function again after it has
76+
* returned ITER_DONE or ITER_ERROR.
77+
*/
78+
int dir_iterator_advance(struct dir_iterator *iterator);
79+
80+
/*
81+
* End the iteration before it has been exhausted. Free the
82+
* dir_iterator and any associated resources and return ITER_DONE. On
83+
* error, free the dir_iterator and return ITER_ERROR.
84+
*/
85+
int dir_iterator_abort(struct dir_iterator *iterator);
86+
87+
#endif

0 commit comments

Comments
 (0)