diff --git a/7z.go b/7z.go index 51086d6..5785c07 100644 --- a/7z.go +++ b/7z.go @@ -92,7 +92,6 @@ func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { defer zFile.Close() file := &file{ - Path: x.clean(zipFile.Name), Data: zFile, FileMode: zipFile.Mode(), DirMode: x.DirMode, @@ -100,6 +99,10 @@ func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { Atime: zipFile.Accessed, } + if file.Path, err = x.clean(zipFile.Name); err != nil { + return 0, file.Path, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? err := fmt.Errorf("%s: %w: %s (from: %s)", zipFile.FileInfo().Name(), ErrInvalidPath, file.Path, zipFile.Name) diff --git a/ar.go b/ar.go index 3f86752..4a122a2 100644 --- a/ar.go +++ b/ar.go @@ -37,13 +37,16 @@ func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { } file := &file{ - Path: x.clean(header.Name), Data: arReader, FileMode: os.FileMode(header.Mode), //nolint:gosec // what else ya gonna do with this? DirMode: x.DirMode, Mtime: header.ModTime, } + if file.Path, err = x.clean(header.Name); err != nil { + return 0, files, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, header.Name) diff --git a/cpio.go b/cpio.go index ad44df7..ced8b1d 100644 --- a/cpio.go +++ b/cpio.go @@ -65,13 +65,17 @@ func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) { func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) { file := &file{ - Path: x.clean(cpioFile.Name), Data: cpioReader, FileMode: cpioFile.FileInfo().Mode(), DirMode: x.DirMode, Mtime: cpioFile.ModTime, } + var err error + if file.Path, err = x.clean(cpioFile.Name); err != nil { + return 0, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of the base path. Malicious archive? return 0, fmt.Errorf("%s: %w: %s (from: %s)", cpioFile.FileInfo().Name(), ErrInvalidPath, file.Path, cpioFile.Name) diff --git a/decompress.go b/decompress.go index eef907d..1dbd1b1 100644 --- a/decompress.go +++ b/decompress.go @@ -30,14 +30,18 @@ func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".xz"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".xz") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -60,14 +64,18 @@ func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { } defer zipReader.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".zz", ".zlib"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".zz", ".zlib") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -89,14 +97,18 @@ func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { return 0, nil, fmt.Errorf("lzma.NewReader: %w", err) } - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".lzma", ".lz", ".lzip"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".lzma", ".lz", ".lzip") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -118,14 +130,18 @@ func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err) } - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".lzma", ".lzma2"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".lzma", ".lzma2") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -148,14 +164,18 @@ func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) } defer zipReader.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".zstd", ".zst"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".zstd", ".zst") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -177,14 +197,18 @@ func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { return 0, nil, fmt.Errorf("lzw.NewReader: %w", err) } - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".Z"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".Z") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -201,14 +225,18 @@ func ExtractLZ4(xFile *XFile) (size int64, filesList []string, err error) { } defer compressedFile.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".lz4"), Data: lz4.NewReader(compressedFile), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".lz4") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -225,14 +253,18 @@ func ExtractSnappy(xFile *XFile) (size int64, filesList []string, err error) { } defer compressedFile.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".snappy", ".sz"), Data: snappy.NewReader(compressedFile), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".snappy", ".sz") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -249,14 +281,18 @@ func ExtractS2(xFile *XFile) (size int64, filesList []string, err error) { } defer compressedFile.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".s2"), Data: s2.NewReader(compressedFile), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".s2") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -273,14 +309,18 @@ func ExtractBrotli(xFile *XFile) (size int64, filesList []string, err error) { } defer compressedFile.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".brotli", ".br"), Data: brotli.NewReader(compressedFile), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".brotli", ".br") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -297,14 +337,18 @@ func ExtractBzip(xFile *XFile) (size int64, filesList []string, err error) { } defer compressedFile.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".bz", ".bz2"), Data: bzip2.NewReader(compressedFile), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".bz", ".bz2") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err @@ -327,15 +371,19 @@ func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { } defer zipReader.Close() - // Get the absolute path of the file being written. file := &file{ - Path: xFile.clean(xFile.FilePath, ".gz"), Data: zipReader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, Mtime: zipReader.ModTime, } + // Get the absolute path of the file being written. + file.Path, err = xFile.clean(xFile.FilePath, ".gz") + if err != nil { + return 0, nil, err + } + size, err = xFile.write(file) if err != nil { return size, nil, err diff --git a/encoding.go b/encoding.go new file mode 100644 index 0000000..85ed061 --- /dev/null +++ b/encoding.go @@ -0,0 +1,30 @@ +package xtractr + +import "fmt" + +/* This file will surely grow when someone writes a proper character encoding detector. */ + +// EncoderInput is used as input for a custom encoder procedure. +type EncoderInput struct { + FileName string + XFile *XFile +} + +// decode a string using the provided decoder. +func (x *XFile) decode(input string) (string, error) { + if x.Encoder == nil { + return input, nil + } + + encoding := x.Encoder(&EncoderInput{FileName: input, XFile: x}) + if encoding == nil { + return input, nil + } + + output, err := encoding.String(input) + if err != nil { + return "", fmt.Errorf("decoding file name: %w", err) + } + + return output, nil +} diff --git a/files.go b/files.go index 3a061ec..a757152 100644 --- a/files.go +++ b/files.go @@ -10,6 +10,8 @@ import ( "regexp" "strings" "time" + + "golang.org/x/text/encoding" ) // ArchiveList is the value returned when searching for compressed files. @@ -112,6 +114,9 @@ type XFile struct { Password string // (RAR/7z) Archive passwords (to try multiple). Passwords []string + // If file names are not UTF8 encoded, pass your own encoder here. + // Provide a function that takes in a file name and returns an encoder for it. + Encoder func(*EncoderInput) *encoding.Decoder // If the archive only has one directory in the root, then setting // this true will cause the extracted content to be moved into the // output folder, and the root folder in the archive to be removed. @@ -473,8 +478,14 @@ func (x *Xtractr) Rename(oldpath, newpath string) error { } // clean returns an absolute path for a file inside the OutputDir. +// clean also decodes the file name using a provided decoder. // If trim length is > 0, then the suffixes are trimmed, and filepath removed. -func (x *XFile) clean(filePath string, trim ...string) string { +func (x *XFile) clean(filePath string, trim ...string) (string, error) { + filePath, err := x.decode(filePath) + if err != nil { + return "", err + } + if len(trim) != 0 { filePath = filepath.Base(filePath) for _, suffix := range trim { @@ -482,7 +493,7 @@ func (x *XFile) clean(filePath string, trim ...string) string { } } - return filepath.Clean(filepath.Join(x.OutputDir, filePath)) + return filepath.Clean(filepath.Join(x.OutputDir, filePath)), nil } // AllExcept can be used as an input to ExcludeSuffix in a Filter. diff --git a/go.mod b/go.mod index 9f2fdd1..fd1b207 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/stretchr/testify v1.10.0 github.com/therootcompany/xz v1.0.1 github.com/ulikunitz/xz v0.5.12 + golang.org/x/text v0.24.0 ) require ( @@ -28,9 +29,9 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/kr/text v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go4.org v0.0.0-20230225012048-214862532bf5 // indirect golang.org/x/crypto v0.37.0 // indirect - golang.org/x/text v0.24.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c70a504..2277822 100644 --- a/go.sum +++ b/go.sum @@ -34,6 +34,7 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -91,8 +92,9 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/peterebden/ar v0.0.0-20241106141004-20dc11b778e8 h1:27L3dHkYbeWGU3/5NasAzVDgXG9QzlfKCvcl4cdNW6c= diff --git a/iso.go b/iso.go index 00c309b..3fd0b71 100644 --- a/iso.go +++ b/iso.go @@ -76,13 +76,17 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, error) { file := &file{ - Path: x.clean(wfile), Data: isoFile.Reader(), FileMode: isoFile.Mode(), DirMode: x.DirMode, Mtime: isoFile.ModTime(), } + var err error + if file.Path, err = x.clean(wfile); err != nil { + return 0, nil, err + } + //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(file.Path, filepath.Join(x.OutputDir)) { // The file being written is trying to write outside of our base path. Malicious ISO? diff --git a/rar.go b/rar.go index 2b87ebb..62611ce 100644 --- a/rar.go +++ b/rar.go @@ -90,13 +90,17 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) } file := &file{ - Path: x.clean(header.Name), Data: rarReader, FileMode: header.Mode(), DirMode: x.DirMode, Mtime: header.ModificationTime, Atime: header.AccessTime, } + + if file.Path, err = x.clean(header.Name); err != nil { + return 0, nil, err + } + //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(file.Path, filepath.Join(x.OutputDir)) { // The file being written is trying to write outside of our base path. Malicious archive? diff --git a/tar.go b/tar.go index 5631432..3897eb7 100644 --- a/tar.go +++ b/tar.go @@ -135,7 +135,6 @@ func (x *XFile) untar(reader io.Reader) (int64, []string, error) { func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (int64, error) { file := &file{ - Path: x.clean(header.Name), Data: tarReader, FileMode: header.FileInfo().Mode(), DirMode: x.DirMode, @@ -143,6 +142,11 @@ func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (int64, err Atime: header.AccessTime, } + var err error + if file.Path, err = x.clean(header.Name); err != nil { + return 0, err + } + if header.Format != tar.FormatGNU && header.Format != tar.FormatPAX { file.Mtime = header.ModTime file.Atime = time.Now() diff --git a/zip.go b/zip.go index df9b112..3058d15 100644 --- a/zip.go +++ b/zip.go @@ -46,7 +46,6 @@ func (x *XFile) unzip(zipFile *zip.File) (int64, string, error) { defer zFile.Close() file := &file{ - Path: x.clean(zipFile.Name), Data: zFile, FileMode: zipFile.Mode(), DirMode: x.DirMode, @@ -54,6 +53,10 @@ func (x *XFile) unzip(zipFile *zip.File) (int64, string, error) { Atime: time.Now(), } + if file.Path, err = x.clean(zipFile.Name); err != nil { + return 0, file.Path, err + } + if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? err := fmt.Errorf("%s: %w: %s (from: %s)", zipFile.FileInfo().Name(), ErrInvalidPath, file.Path, zipFile.Name)