From ed97fc27dbc09c0cc01047f62e73e688b6bfdec0 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sat, 12 Apr 2025 15:52:43 -0700 Subject: [PATCH 1/5] allow providing custom file name encoding --- 7z.go | 6 +++++- ar.go | 6 +++++- cpio.go | 6 +++++- decompress.go | 60 ++++++++++++++++++++++++++++++++++++++++----------- files.go | 28 ++++++++++++++++++++++-- iso.go | 7 ++++-- rar.go | 6 +++++- tar.go | 6 +++++- zip.go | 6 +++++- 9 files changed, 109 insertions(+), 22 deletions(-) diff --git a/7z.go b/7z.go index 02dc3c9..3374eab 100644 --- a/7z.go +++ b/7z.go @@ -84,7 +84,11 @@ func extract7z(xFile *XFile) (int64, []string, []string, error) { } func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { - wfile := x.clean(zipFile.Name) + wfile, err := x.clean(zipFile.Name) + if err != nil { + return 0, wfile, err + } + if !strings.HasPrefix(wfile, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? return 0, wfile, fmt.Errorf("%s: %w: %s (from: %s)", zipFile.FileInfo().Name(), ErrInvalidPath, wfile, zipFile.Name) diff --git a/ar.go b/ar.go index 93469f6..86ff11d 100644 --- a/ar.go +++ b/ar.go @@ -38,7 +38,11 @@ func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { return size, files, fmt.Errorf("%w: %s", ErrInvalidHead, x.FilePath) } - wfile := x.clean(header.Name) + wfile, err := x.clean(header.Name) + if err != nil { + return 0, files, err + } + if !strings.HasPrefix(wfile, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, wfile, header.Name) diff --git a/cpio.go b/cpio.go index fd882d0..f1ae106 100644 --- a/cpio.go +++ b/cpio.go @@ -64,7 +64,11 @@ func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) { } func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) { - wfile := x.clean(cpioFile.Name) + wfile, err := x.clean(cpioFile.Name) + if err != nil { + return 0, err + } + if !strings.HasPrefix(wfile, x.OutputDir) { // The file being written is trying to write outside of the base path. Malicious archive? return 0, fmt.Errorf("%s: %w: %s (from: %s)", cpioFile.FileInfo().Name(), ErrInvalidPath, wfile, cpioFile.Name) diff --git a/decompress.go b/decompress.go index 0526334..0e724ae 100644 --- a/decompress.go +++ b/decompress.go @@ -31,7 +31,10 @@ func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { } // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".xz") + wfile, err := xFile.clean(xFile.FilePath, ".xz") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -56,7 +59,10 @@ func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { defer zipReader.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".zz", ".zlib") + wfile, err := xFile.clean(xFile.FilePath, ".zz", ".zlib") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -80,7 +86,10 @@ func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { } // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".lzma", ".lz", ".lzip") + wfile, err := xFile.clean(xFile.FilePath, ".lzma", ".lz", ".lzip") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -104,7 +113,10 @@ func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { } // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".lzma", ".lzma2") + wfile, err := xFile.clean(xFile.FilePath, ".lzma", ".lzma2") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -129,7 +141,10 @@ func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) defer zipReader.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".zstd", ".zst") + wfile, err := xFile.clean(xFile.FilePath, ".zstd", ".zst") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -153,7 +168,10 @@ func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { } // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".Z") + wfile, err := xFile.clean(xFile.FilePath, ".Z") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { @@ -172,7 +190,10 @@ func ExtractLZ4(xFile *XFile) (size int64, filesList []string, err error) { defer compressedFile.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".lz4") + wfile, err := xFile.clean(xFile.FilePath, ".lz4") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, lz4.NewReader(compressedFile), xFile.FileMode, xFile.DirMode) if err != nil { @@ -191,7 +212,10 @@ func ExtractSnappy(xFile *XFile) (size int64, filesList []string, err error) { defer compressedFile.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".snappy", ".sz") + wfile, err := xFile.clean(xFile.FilePath, ".snappy", ".sz") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, snappy.NewReader(compressedFile), xFile.FileMode, xFile.DirMode) if err != nil { @@ -210,7 +234,10 @@ func ExtractS2(xFile *XFile) (size int64, filesList []string, err error) { defer compressedFile.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".s2") + wfile, err := xFile.clean(xFile.FilePath, ".s2") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, s2.NewReader(compressedFile), xFile.FileMode, xFile.DirMode) if err != nil { @@ -229,7 +256,10 @@ func ExtractBrotli(xFile *XFile) (size int64, filesList []string, err error) { defer compressedFile.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".brotli", ".br") + wfile, err := xFile.clean(xFile.FilePath, ".brotli", ".br") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, brotli.NewReader(compressedFile), xFile.FileMode, xFile.DirMode) if err != nil { @@ -248,7 +278,10 @@ func ExtractBzip(xFile *XFile) (size int64, filesList []string, err error) { defer compressedFile.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".bz", ".bz2") + wfile, err := xFile.clean(xFile.FilePath, ".bz", ".bz2") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, bzip2.NewReader(compressedFile), xFile.FileMode, xFile.DirMode) if err != nil { @@ -273,7 +306,10 @@ func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { defer zipReader.Close() // Get the absolute path of the file being written. - wfile := xFile.clean(xFile.FilePath, ".gz") + wfile, err := xFile.clean(xFile.FilePath, ".gz") + if err != nil { + return 0, nil, err + } size, err = writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode) if err != nil { diff --git a/files.go b/files.go index 711f517..77a7f56 100644 --- a/files.go +++ b/files.go @@ -9,6 +9,8 @@ import ( "path/filepath" "regexp" "strings" + + "golang.org/x/text/encoding" ) // ArchiveList is the value returned when searching for compressed files. @@ -111,6 +113,8 @@ type XFile struct { Password string // (RAR/7z) Archive passwords (to try multiple). Passwords []string + // If file names are not UTF8 encoded, pass your own encoder here. + Encoding encoding.Encoding // Logger allows printing debug messages. log Logger } @@ -446,8 +450,9 @@ func (x *Xtractr) Rename(oldpath, newpath string) error { } // clean returns an absolute path for a file inside the OutputDir. +// clean also decodes the file name using a provided decoder. // If trim length is > 0, then the suffixes are trimmed, and filepath removed. -func (x *XFile) clean(filePath string, trim ...string) string { +func (x *XFile) clean(filePath string, trim ...string) (string, error) { if len(trim) != 0 { filePath = filepath.Base(filePath) for _, suffix := range trim { @@ -455,7 +460,26 @@ func (x *XFile) clean(filePath string, trim ...string) string { } } - return filepath.Clean(filepath.Join(x.OutputDir, filePath)) + decoded, err := x.decode(filePath) + if err != nil { + return "", err + } + + return filepath.Clean(filepath.Join(x.OutputDir, decoded)), nil +} + +// decode a string using the provided decoder. +func (x *XFile) decode(input string) (string, error) { + if x.Encoding == nil { + return input, nil + } + + output, err := x.Encoding.NewDecoder().String(input) + if err != nil { + return "", fmt.Errorf("decoding file name: %w", err) + } + + return output, nil } // AllExcept can be used as an input to ExcludeSuffix in a Filter. diff --git a/iso.go b/iso.go index 4137bde..0e756d1 100644 --- a/iso.go +++ b/iso.go @@ -67,8 +67,11 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er return size, files, nil } -func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, error) { - wfile = x.clean(wfile) +func (x *XFile) unisofile(isoFile *iso9660.File, fileName string) (int64, []string, error) { + wfile, err := x.clean(fileName) + if err != nil { + return 0, nil, err + } //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(wfile, filepath.Join(x.OutputDir)) { diff --git a/rar.go b/rar.go index 166935d..0a02a6d 100644 --- a/rar.go +++ b/rar.go @@ -92,7 +92,11 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) return size, files, fmt.Errorf("%w: %s", ErrInvalidHead, x.FilePath) } - wfile := x.clean(header.Name) + wfile, err := x.clean(header.Name) + if err != nil { + return 0, nil, err + } + //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(wfile, filepath.Join(x.OutputDir)) { // The file being written is trying to write outside of our base path. Malicious archive? diff --git a/tar.go b/tar.go index 69f42fd..37aa804 100644 --- a/tar.go +++ b/tar.go @@ -119,7 +119,11 @@ func (x *XFile) untar(reader io.Reader) (int64, []string, error) { return size, files, fmt.Errorf("%w: %s", ErrInvalidHead, x.FilePath) } - wfile := x.clean(header.Name) + wfile, err := x.clean(header.Name) + if err != nil { + return 0, nil, err + } + if !strings.HasPrefix(wfile, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, wfile, header.Name) diff --git a/zip.go b/zip.go index 9e35a9a..85ab46c 100644 --- a/zip.go +++ b/zip.go @@ -37,7 +37,11 @@ func ExtractZIP(xFile *XFile) (size int64, filesList []string, err error) { } func (x *XFile) unzip(zipFile *zip.File) (int64, string, error) { - wfile := x.clean(zipFile.Name) + wfile, err := x.clean(zipFile.Name) + if err != nil { + return 0, wfile, err + } + if !strings.HasPrefix(wfile, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? return 0, wfile, fmt.Errorf("%s: %w: %s (from: %s)", zipFile.FileInfo().Name(), ErrInvalidPath, wfile, zipFile.Name) From bb205fdbd2da8c0aafc4b0024be086ec41e85488 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sat, 12 Apr 2025 17:13:19 -0700 Subject: [PATCH 2/5] attempt to detect charset using an oooold library --- files.go | 12 ++++++++++-- go.mod | 5 ++++- go.sum | 8 +++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/files.go b/files.go index 77a7f56..962caee 100644 --- a/files.go +++ b/files.go @@ -10,6 +10,8 @@ import ( "regexp" "strings" + "github.com/saintfish/chardet" + "golang.org/x/net/html/charset" "golang.org/x/text/encoding" ) @@ -470,11 +472,17 @@ func (x *XFile) clean(filePath string, trim ...string) (string, error) { // decode a string using the provided decoder. func (x *XFile) decode(input string) (string, error) { - if x.Encoding == nil { + encoding := x.Encoding + if encoding == nil { + res, _ := chardet.NewTextDetector().DetectBest([]byte(input)) + encoding, _ = charset.Lookup(res.Charset) + } + + if encoding == nil { return input, nil } - output, err := x.Encoding.NewDecoder().String(input) + output, err := encoding.NewDecoder().String(input) if err != nil { return "", fmt.Errorf("decoding file name: %w", err) } diff --git a/go.mod b/go.mod index 9f2fdd1..b4c5bf4 100644 --- a/go.mod +++ b/go.mod @@ -15,10 +15,13 @@ require ( github.com/nwaples/rardecode v1.1.3 github.com/peterebden/ar v0.0.0-20241106141004-20dc11b778e8 github.com/pierrec/lz4/v4 v4.1.22 + github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de github.com/stretchr/testify v1.10.0 github.com/therootcompany/xz v1.0.1 github.com/ulikunitz/xz v0.5.12 + golang.org/x/net v0.39.0 + golang.org/x/text v0.24.0 ) require ( @@ -28,9 +31,9 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/kr/text v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect golang.org/x/crypto v0.37.0 // indirect - golang.org/x/text v0.24.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c70a504..ae26d59 100644 --- a/go.sum +++ b/go.sum @@ -34,6 +34,7 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -91,8 +92,9 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/peterebden/ar v0.0.0-20241106141004-20dc11b778e8 h1:27L3dHkYbeWGU3/5NasAzVDgXG9QzlfKCvcl4cdNW6c= @@ -104,6 +106,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de h1:uIeuAon/xwRdiZaCmEd5mocquesYkWCf71WBO7obTmA= github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de/go.mod h1:XIUpD+1rteMazWrMFjNSpM6TocSHxDYXk6UEgBb5+F0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -178,6 +182,8 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= +golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= From b33c9282111d3f169fc0ec11adccdb5bcd4b34b4 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sat, 19 Apr 2025 10:45:34 -0700 Subject: [PATCH 3/5] Add encoder input interface --- encoding.go | 30 ++++++++++++++++++++++++++++++ files.go | 37 ++++++++----------------------------- go.mod | 2 -- go.sum | 4 ---- 4 files changed, 38 insertions(+), 35 deletions(-) create mode 100644 encoding.go diff --git a/encoding.go b/encoding.go new file mode 100644 index 0000000..85ed061 --- /dev/null +++ b/encoding.go @@ -0,0 +1,30 @@ +package xtractr + +import "fmt" + +/* This file will surely grow when someone writes a proper character encoding detector. */ + +// EncoderInput is used as input for a custom encoder procedure. +type EncoderInput struct { + FileName string + XFile *XFile +} + +// decode a string using the provided decoder. +func (x *XFile) decode(input string) (string, error) { + if x.Encoder == nil { + return input, nil + } + + encoding := x.Encoder(&EncoderInput{FileName: input, XFile: x}) + if encoding == nil { + return input, nil + } + + output, err := encoding.String(input) + if err != nil { + return "", fmt.Errorf("decoding file name: %w", err) + } + + return output, nil +} diff --git a/files.go b/files.go index 962caee..716f368 100644 --- a/files.go +++ b/files.go @@ -10,8 +10,6 @@ import ( "regexp" "strings" - "github.com/saintfish/chardet" - "golang.org/x/net/html/charset" "golang.org/x/text/encoding" ) @@ -116,7 +114,8 @@ type XFile struct { // (RAR/7z) Archive passwords (to try multiple). Passwords []string // If file names are not UTF8 encoded, pass your own encoder here. - Encoding encoding.Encoding + // Provide a function that takes in a file name and returns an encoder for it. + Encoder func(input *EncoderInput) *encoding.Decoder // Logger allows printing debug messages. log Logger } @@ -455,6 +454,11 @@ func (x *Xtractr) Rename(oldpath, newpath string) error { // clean also decodes the file name using a provided decoder. // If trim length is > 0, then the suffixes are trimmed, and filepath removed. func (x *XFile) clean(filePath string, trim ...string) (string, error) { + filePath, err := x.decode(filePath) + if err != nil { + return "", err + } + if len(trim) != 0 { filePath = filepath.Base(filePath) for _, suffix := range trim { @@ -462,32 +466,7 @@ func (x *XFile) clean(filePath string, trim ...string) (string, error) { } } - decoded, err := x.decode(filePath) - if err != nil { - return "", err - } - - return filepath.Clean(filepath.Join(x.OutputDir, decoded)), nil -} - -// decode a string using the provided decoder. -func (x *XFile) decode(input string) (string, error) { - encoding := x.Encoding - if encoding == nil { - res, _ := chardet.NewTextDetector().DetectBest([]byte(input)) - encoding, _ = charset.Lookup(res.Charset) - } - - if encoding == nil { - return input, nil - } - - output, err := encoding.NewDecoder().String(input) - if err != nil { - return "", fmt.Errorf("decoding file name: %w", err) - } - - return output, nil + return filepath.Clean(filepath.Join(x.OutputDir, filePath)), nil } // AllExcept can be used as an input to ExcludeSuffix in a Filter. diff --git a/go.mod b/go.mod index b4c5bf4..fd1b207 100644 --- a/go.mod +++ b/go.mod @@ -15,12 +15,10 @@ require ( github.com/nwaples/rardecode v1.1.3 github.com/peterebden/ar v0.0.0-20241106141004-20dc11b778e8 github.com/pierrec/lz4/v4 v4.1.22 - github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de github.com/stretchr/testify v1.10.0 github.com/therootcompany/xz v1.0.1 github.com/ulikunitz/xz v0.5.12 - golang.org/x/net v0.39.0 golang.org/x/text v0.24.0 ) diff --git a/go.sum b/go.sum index ae26d59..2277822 100644 --- a/go.sum +++ b/go.sum @@ -106,8 +106,6 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= -github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= -github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de h1:uIeuAon/xwRdiZaCmEd5mocquesYkWCf71WBO7obTmA= github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de/go.mod h1:XIUpD+1rteMazWrMFjNSpM6TocSHxDYXk6UEgBb5+F0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -182,8 +180,6 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= -golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= From ccca0adf07c201d2749337bcdf4b86f49115842a Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sat, 19 Apr 2025 11:02:40 -0700 Subject: [PATCH 4/5] consistency --- 7z.go | 10 ++++------ cpio.go | 11 +++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/7z.go b/7z.go index 2a88e99..5785c07 100644 --- a/7z.go +++ b/7z.go @@ -91,13 +91,7 @@ func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { } defer zFile.Close() - wfile, err := x.clean(zipFile.Name) - if err != nil { - return 0, wfile, err - } - file := &file{ - Path: wfile, Data: zFile, FileMode: zipFile.Mode(), DirMode: x.DirMode, @@ -105,6 +99,10 @@ func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { Atime: zipFile.Accessed, } + if file.Path, err = x.clean(zipFile.Name); err != nil { + return 0, file.Path, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? err := fmt.Errorf("%s: %w: %s (from: %s)", zipFile.FileInfo().Name(), ErrInvalidPath, file.Path, zipFile.Name) diff --git a/cpio.go b/cpio.go index 0ab566c..ced8b1d 100644 --- a/cpio.go +++ b/cpio.go @@ -64,19 +64,18 @@ func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) { } func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) { - wfile, err := x.clean(cpioFile.Name) - if err != nil { - return 0, err - } - file := &file{ - Path: wfile, Data: cpioReader, FileMode: cpioFile.FileInfo().Mode(), DirMode: x.DirMode, Mtime: cpioFile.ModTime, } + var err error + if file.Path, err = x.clean(cpioFile.Name); err != nil { + return 0, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of the base path. Malicious archive? return 0, fmt.Errorf("%s: %w: %s (from: %s)", cpioFile.FileInfo().Name(), ErrInvalidPath, file.Path, cpioFile.Name) From 1939370795cda207662cc634cfc2862a4fade752 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sat, 19 Apr 2025 11:46:03 -0700 Subject: [PATCH 5/5] remove name --- files.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files.go b/files.go index 8fadce5..a757152 100644 --- a/files.go +++ b/files.go @@ -116,7 +116,7 @@ type XFile struct { Passwords []string // If file names are not UTF8 encoded, pass your own encoder here. // Provide a function that takes in a file name and returns an encoder for it. - Encoder func(input *EncoderInput) *encoding.Decoder + Encoder func(*EncoderInput) *encoding.Decoder // If the archive only has one directory in the root, then setting // this true will cause the extracted content to be moved into the // output folder, and the root folder in the archive to be removed.