Skip to content

Commit a8f47de

Browse files
committed
Optimize tarfs ReadDir
Signed-off-by: Jon Johnson <[email protected]>
1 parent 84d996f commit a8f47de

File tree

2 files changed

+37
-38
lines changed

2 files changed

+37
-38
lines changed

main.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func run(args []string) error {
3333
return err
3434
}
3535

36-
fsys, err := tarfs.New(zr)
36+
fsys, err := tarfs.New(zr, resp.ContentLength)
3737
if err != nil {
3838
return err
3939
}
@@ -59,7 +59,8 @@ func run(args []string) error {
5959
return err
6060
}
6161

62-
fsys, err := tarfs.New(zr)
62+
// We don't know the size until we decompress it.
63+
fsys, err := tarfs.New(zr, 1<<63-1)
6364
if err != nil {
6465
return err
6566
}

tarfs/tarfs.go

+34-36
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package tarfs
1717
import (
1818
"archive/tar"
1919
"bufio"
20+
"cmp"
2021
"encoding/json"
2122
"errors"
2223
"fmt"
@@ -37,7 +38,7 @@ type Entry struct {
3738
Offset int64
3839

3940
Filename string
40-
Dir string
41+
dir string
4142
fi fs.FileInfo
4243
}
4344

@@ -97,6 +98,7 @@ type FS struct {
9798
ra io.ReaderAt
9899
files []*Entry
99100
index map[string]int
101+
dirs map[string][]fs.DirEntry
100102
}
101103

102104
func (fsys *FS) Readlink(name string) (string, error) {
@@ -118,7 +120,7 @@ func (fsys *FS) Open(name string) (fs.File, error) {
118120
if name == "." {
119121
return &File{
120122
Entry: &Entry{
121-
Dir: ".",
123+
dir: ".",
122124
Filename: ".",
123125
Header: tar.Header{
124126
Name: ".",
@@ -170,23 +172,12 @@ func (fsys *FS) Stat(name string) (fs.FileInfo, error) {
170172
}
171173

172174
func (fsys *FS) ReadDir(name string) ([]fs.DirEntry, error) {
173-
children := []fs.DirEntry{}
174-
for _, f := range fsys.files {
175-
// This is load bearing for now.
176-
f := f
177-
178-
if f.Dir != name {
179-
continue
180-
}
181-
182-
children = append(children, f)
175+
dirs, ok := fsys.dirs[name]
176+
if !ok {
177+
return []fs.DirEntry{}, nil
183178
}
184179

185-
slices.SortFunc(children, func(a, b fs.DirEntry) int {
186-
return strings.Compare(a.Name(), b.Name())
187-
})
188-
189-
return children, nil
180+
return dirs, nil
190181
}
191182

192183
type countReader struct {
@@ -200,35 +191,22 @@ func (cr *countReader) Read(p []byte) (int, error) {
200191
return n, err
201192
}
202193

203-
func New(ra io.ReaderAt) (*FS, error) {
194+
func New(ra io.ReaderAt, size int64) (*FS, error) {
204195
fsys := &FS{
205196
ra: ra,
206197
files: []*Entry{},
207198
index: map[string]int{},
199+
dirs: map[string][]fs.DirEntry{},
208200
}
209201

210-
var r io.Reader
211-
if reader, ok := ra.(io.Reader); ok {
212-
r = reader
213-
} else {
214-
size := int64(-1)
215-
if statter, ok := ra.(interface {
216-
Stat() (fs.FileInfo, error)
217-
}); ok {
218-
stat, err := statter.Stat()
219-
if err != nil {
220-
return nil, err
221-
}
222-
size = stat.Size()
223-
}
224-
r = io.NewSectionReader(ra, 0, size)
225-
}
202+
// Number of entries in a given directory, so we know how large of a slice to allocate.
203+
dirCount := map[string]int{}
226204

205+
r := io.NewSectionReader(ra, 0, size)
227206
cr := &countReader{bufio.NewReaderSize(r, 1<<20), 0}
228207
tr := tar.NewReader(cr)
229208

230209
// TODO: Do this lazily.
231-
// TODO: Allow this to be saved and restored.
232210
for {
233211
hdr, err := tr.Next()
234212
if errors.Is(err, io.EOF) {
@@ -239,15 +217,35 @@ func New(ra io.ReaderAt) (*FS, error) {
239217
}
240218

241219
normalized := normalize(hdr.Name)
220+
dir := path.Dir(normalized)
221+
242222
fsys.index[normalized] = len(fsys.files)
243223

244224
fsys.files = append(fsys.files, &Entry{
245225
Header: *hdr,
246226
Offset: cr.n,
247227
Filename: normalized,
248-
Dir: path.Dir(normalized),
228+
dir: dir,
249229
fi: hdr.FileInfo(),
250230
})
231+
232+
dirCount[dir]++
233+
}
234+
235+
// Pre-generate the results of ReadDir so we don't allocate a ton if fs.WalkDir calls us.
236+
// TODO: Consider doing this lazily in a sync.Once the first time we see a ReadDir.
237+
for dir, count := range dirCount {
238+
fsys.dirs[dir] = make([]fs.DirEntry, 0, count)
239+
}
240+
241+
for _, f := range fsys.files {
242+
fsys.dirs[f.dir] = append(fsys.dirs[f.dir], f)
243+
}
244+
245+
for _, files := range fsys.dirs {
246+
slices.SortFunc(files, func(a, b fs.DirEntry) int {
247+
return cmp.Compare(a.Name(), b.Name())
248+
})
251249
}
252250

253251
return fsys, nil

0 commit comments

Comments
 (0)