diff --git a/.golangci.yml b/.golangci.yml index 8c037fd..e629623 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -7,6 +7,7 @@ linters: - exhaustruct - depguard - nonamedreturns + - forbidigo exclusions: rules: - linters: @@ -20,6 +21,7 @@ linters: gosec: excludes: - G304 + - G115 gocritic: enable-all: true disabled-checks: @@ -30,8 +32,8 @@ linters: checkExported: true errcheck: check-type-assertions: true - check-blank: true - disable-default-exclusions: true + check-blank: false + disable-default-exclusions: false exclude-functions: - (*os.File).Close - os.RemoveAll diff --git a/7z.go b/7z.go index 51086d6..19681aa 100644 --- a/7z.go +++ b/7z.go @@ -10,7 +10,7 @@ import ( // Extract7z extracts a 7zip archive. // Volumes: https://github.com/bodgit/sevenzip/issues/54 -func Extract7z(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func Extract7z(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { if len(xFile.Passwords) == 0 && xFile.Password == "" { return extract7z(xFile) } @@ -41,50 +41,51 @@ func Extract7z(xFile *XFile) (size int64, filesList, archiveList []string, err e return 0, nil, nil, nil } -func extract7z(xFile *XFile) (int64, []string, []string, error) { - var ( - sevenZip *sevenzip.ReadCloser - err error - ) - - if xFile.Password != "" { - sevenZip, err = sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) - } else { - sevenZip, err = sevenzip.OpenReader(xFile.FilePath) +func extract7z(xFile *XFile) (uint64, []string, []string, error) { + sevenZip, err := sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) + if err != nil { + return 0, nil, nil, fmt.Errorf("%s: os.Open: %w", xFile.FilePath, err) } + defer xFile.newProgress(getUncompressed7zSize(sevenZip)).done() // this closes sevenZip + + sevenZip, err = sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) if err != nil { return 0, nil, nil, fmt.Errorf("%s: os.Open: %w", xFile.FilePath, err) } - defer sevenZip.Close() files := []string{} - size := int64(0) for _, zipFile := range sevenZip.File { fSize, wfile, err := xFile.un7zip(zipFile) if err != nil { - lastFile := xFile.FilePath - /* // https://github.com/bodgit/sevenzip/issues/54 - // We can probably never get the file with the error. - if volumes := sevenZip.Volumes(); len(volumes) > 0 { - lastFile = volumes[len(volumes)-1] - } */ - return size, files, sevenZip.Volumes(), fmt.Errorf("%s: %w", lastFile, err) + return xFile.prog.Wrote, files, []string{xFile.FilePath}, fmt.Errorf("%s: %w", xFile.FilePath, err) } files = append(files, filepath.Join(xFile.OutputDir, zipFile.Name)) - size += fSize - xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", wfile, fSize, len(files), size) + xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + wfile, fSize, xFile.prog.Files, xFile.prog.Wrote) } files, err = xFile.cleanup(files) - return size, files, sevenZip.Volumes(), err + return xFile.prog.Wrote, files, []string{xFile.FilePath}, err +} + +func getUncompressed7zSize(reader *sevenzip.ReadCloser) (total, compressed uint64, count int) { + defer reader.Close() + + for _, zipFile := range reader.File { + total += zipFile.UncompressedSize + // compressed += uint64(zipFile.FileInfo().Size()) + count++ + } + + return total, 0, count } -func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { +func (x *XFile) un7zip(zipFile *sevenzip.File) (uint64, string, error) { zFile, err := zipFile.Open() if err != nil { return 0, zipFile.Name, fmt.Errorf("zipFile.Open: %w", err) diff --git a/ar.go b/ar.go index 3f86752..85c1294 100644 --- a/ar.go +++ b/ar.go @@ -11,20 +11,28 @@ import ( ) // ExtractAr extracts a raw ar archive. Used by debian (.deb) packages. -func ExtractAr(xFile *XFile) (size int64, filesList []string, err error) { +func ExtractAr(xFile *XFile) (size uint64, filesList []string, err error) { arFile, err := os.Open(xFile.FilePath) if err != nil { + return 0, nil, fmt.Errorf("rardecode.OpenReader: %w", err) + } + + defer xFile.newProgress(getUncompressedArSize(arFile)).done() // this closes arFile + + if arFile, err = os.Open(xFile.FilePath); err != nil { return 0, nil, fmt.Errorf("os.Open: %w", err) } + defer arFile.Close() - return xFile.unAr(arFile) + files, err := xFile.unAr(xFile.prog.reader(arFile)) + + return xFile.prog.Wrote, files, err } -func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { +func (x *XFile) unAr(reader io.Reader) ([]string, error) { arReader := ar.NewReader(reader) files := []string{} - size := int64(0) for { header, err := arReader.Next() @@ -33,34 +41,54 @@ func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { break } - return size, files, fmt.Errorf("%s: arReader.Next: %w", x.FilePath, err) + return files, fmt.Errorf("%s: arReader.Next: %w", x.FilePath, err) } file := &file{ Path: x.clean(header.Name), Data: arReader, - FileMode: os.FileMode(header.Mode), //nolint:gosec // what else ya gonna do with this? + FileMode: os.FileMode(header.Mode), DirMode: x.DirMode, Mtime: header.ModTime, } if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? - return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, header.Name) + return files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, header.Name) } // ar format does not store directory paths. Flat list of files. fSize, err := x.write(file) if err != nil { - return size, files, err + return files, err } files = append(files, file.Path) - size += fSize + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, fSize, x.prog.Files, x.prog.Wrote) } - files, err := x.cleanup(files) + return x.cleanup(files) +} + +// ar files are not compressed. +func getUncompressedArSize(arFile io.ReadCloser) (total, compressed uint64, count int) { + defer arFile.Close() + + arReader := ar.NewReader(arFile) - return size, files, err + for { + header, err := arReader.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return total, 0, count + } + + return total, 0, count + } + + total += uint64(header.Size) + count++ + } } diff --git a/cpio.go b/cpio.go index ad44df7..47a1cc0 100644 --- a/cpio.go +++ b/cpio.go @@ -13,57 +13,65 @@ import ( ) // ExtractCPIOGzip extracts a gzip-compressed cpio archive (cpgz). -func ExtractCPIOGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractCPIOGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipStream.Close() - return xFile.uncpio(zipStream) + files, err := xFile.uncpio(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractCPIO extracts a .cpio file. -func ExtractCPIO(xFile *XFile) (size int64, filesList []string, err error) { - fileReader, err := os.Open(xFile.FilePath) +func ExtractCPIO(xFile *XFile) (size uint64, filesList []string, err error) { + fileReader, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer fileReader.Close() - return xFile.uncpio(fileReader) + defer xFile.newProgress(uint64(stat.Size()), uint64(stat.Size()), 0).done() + + files, err := xFile.uncpio(xFile.prog.reader(fileReader)) + + return xFile.prog.Wrote, files, err } -func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) { +func (x *XFile) uncpio(reader io.Reader) ([]string, error) { zipReader := cpio.NewReader(reader) files := []string{} - size := int64(0) for { zipFile, err := zipReader.Next() if errors.Is(err, io.EOF) { - return size, files, nil + return files, nil } else if err != nil { - return 0, nil, fmt.Errorf("cpio Next() failed: %w", err) + return nil, fmt.Errorf("cpio Next() failed: %w", err) } fSize, err := x.uncpioFile(zipFile, zipReader) if err != nil { - return size, files, fmt.Errorf("%s: %w", x.FilePath, err) + return files, fmt.Errorf("%s: %w", x.FilePath, err) } files = append(files, filepath.Join(x.OutputDir, zipFile.Name)) - size += fSize + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + zipFile.Name, fSize, x.prog.Files, x.prog.Wrote) } } -func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) { +func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (uint64, error) { file := &file{ Path: x.clean(cpioFile.Name), Data: cpioReader, diff --git a/decompress.go b/decompress.go index eef907d..6e2e63f 100644 --- a/decompress.go +++ b/decompress.go @@ -4,7 +4,6 @@ import ( "compress/bzip2" "compress/gzip" "fmt" - "os" "github.com/andybalholm/brotli" "github.com/klauspost/compress/s2" @@ -18,14 +17,16 @@ import ( ) // ExtractXZ extracts an XZ-compressed file. A single file. -func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractXZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := xz.NewReader(compressedFile, 0) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := xz.NewReader(xFile.prog.reader(compressedFile), 0) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } @@ -39,22 +40,21 @@ func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractZlib extracts a zlib-compressed file. A single file. -func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractZlib(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := zlib.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := zlib.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("zlib.NewReader: %w", err) } @@ -69,22 +69,21 @@ func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZMA extracts an lzma-compressed file. A single file. -func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZMA(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzma.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzma.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzma.NewReader: %w", err) } @@ -106,14 +105,16 @@ func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { } // ExtractLZMA2 extracts an lzma2-compressed file. A single file. -func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZMA2(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzma.NewReader2(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzma.NewReader2(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err) } @@ -127,22 +128,21 @@ func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractZstandard extracts a Zstandard-compressed file. A single file. -func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractZstandard(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := zstd.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := zstd.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("zstd.NewReader: %w", err) } @@ -157,22 +157,21 @@ func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZW extracts an LZW-compressed file. A single file. -func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZW(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzw.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzw.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzw.NewReader: %w", err) } @@ -186,142 +185,139 @@ func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZ4 extracts an LZ4-compressed file. A single file. -func ExtractLZ4(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZ4(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + reader := lz4.NewReader(xFile.prog.reader(compressedFile)) + xFile.prog.Total = uint64(reader.Size()) + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".lz4"), - Data: lz4.NewReader(compressedFile), + Data: reader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractSnappy extracts a snappy-compressed file. A single file. -func ExtractSnappy(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractSnappy(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".snappy", ".sz"), - Data: snappy.NewReader(compressedFile), + Data: snappy.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractS2 extracts a Snappy2-compressed file. A single file. -func ExtractS2(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractS2(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".s2"), - Data: s2.NewReader(compressedFile), + Data: s2.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractBrotli extracts a Brotli-compressed file. A single file. -func ExtractBrotli(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractBrotli(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".brotli", ".br"), - Data: brotli.NewReader(compressedFile), + Data: brotli.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractBzip extracts a bzip2-compressed file. That is, a single file. -func ExtractBzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractBzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".bz", ".bz2"), - Data: bzip2.NewReader(compressedFile), + Data: bzip2.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractGzip extracts a gzip-compressed file. That is, a single file. -func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } @@ -337,9 +333,6 @@ func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } diff --git a/files.go b/files.go index 3a061ec..6e3a278 100644 --- a/files.go +++ b/files.go @@ -24,7 +24,7 @@ type archive struct { } // Interface is a common interface for extracting compressed or non-compressed files or archives. -type Interface func(x *XFile) (size int64, filesList, archiveList []string, err error) +type Interface func(x *XFile) (size uint64, filesList, archiveList []string, err error) // https://github.com/golift/xtractr/issues/44 // @@ -80,8 +80,8 @@ var extension2function = []archive{ // ChngInt converts the smaller return interface into an ExtractInterface. // Functions with multi-part archive files return four values. Other functions return only 3. // This ChngInt function makes both interfaces compatible. -func ChngInt(smallFn func(*XFile) (int64, []string, error)) Interface { - return func(xFile *XFile) (int64, []string, []string, error) { +func ChngInt(smallFn func(*XFile) (uint64, []string, error)) Interface { + return func(xFile *XFile) (uint64, []string, []string, error) { size, files, err := smallFn(xFile) return size, files, []string{xFile.FilePath}, err } @@ -112,6 +112,13 @@ type XFile struct { Password string // (RAR/7z) Archive passwords (to try multiple). Passwords []string + // Progress is called periodically during file extraction. + // Contains info about the progress of the extraction. + // This is not called if an Updates channel is also provided. + Progress func(Progress) + // If an Updates channel is provided, all Progress updates are sent to it. + // Contains info about the progress of the extraction. + Updates chan Progress // If the archive only has one directory in the root, then setting // this true will cause the extracted content to be moved into the // output folder, and the root folder in the archive to be removed. @@ -119,6 +126,7 @@ type XFile struct { // Logger allows printing debug messages. log Logger moveFiles func(fromPath, toPath string, overwrite bool) ([]string, error) + prog *Progress } // Filter is the input to find compressed files. @@ -310,13 +318,13 @@ func getCompressedFiles(path string, filter *Filter, fileList []os.FileInfo, dep // Extract calls the correct procedure for the type of file being extracted. // Returns size of extracted data, list of extracted files, and/or error. -func (x *XFile) Extract() (size int64, filesList, archiveList []string, err error) { +func (x *XFile) Extract() (size uint64, filesList, archiveList []string, err error) { return ExtractFile(x) } // ExtractFile calls the correct procedure for the type of file being extracted. // Returns size of extracted data, list of extracted files, list of archives processed, and/or error. -func ExtractFile(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func ExtractFile(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { sName := strings.ToLower(xFile.FilePath) // just borrowing this... Has to go into an interface to avoid a cycle. xFile.moveFiles = parseConfig(&Config{Logger: xFile.log}).MoveFiles @@ -416,7 +424,7 @@ func (x *XFile) mkDir(path string, mode os.FileMode, mtime time.Time) error { } // write a file from an io reader, making sure all parent directories exist. -func (x *XFile) write(file *file) (int64, error) { +func (x *XFile) write(file *file) (uint64, error) { if err := x.mkDir(filepath.Dir(file.Path), file.DirMode, file.Mtime); err != nil { return 0, fmt.Errorf("writing archived file '%s' parent folder: %w", filepath.Base(file.Path), err) } @@ -427,17 +435,17 @@ func (x *XFile) write(file *file) (int64, error) { } defer fout.Close() - size, err := io.Copy(fout, file.Data) + size, err := io.Copy(x.prog.writer(fout), file.Data) if err != nil { - return size, fmt.Errorf("copying archived file '%s' io: %w", file.Path, err) + return uint64(size), fmt.Errorf("copying archived file '%s' io: %w", file.Path, err) } // If this sucks, make it a defer and ignore the error, like xFile.mkDir(). if err = os.Chtimes(file.Path, file.Atime, file.Mtime); err != nil { - return size, fmt.Errorf("changing archived file times: %w", err) + return uint64(size), fmt.Errorf("changing archived file times: %w", err) } - return size, nil + return uint64(size), nil } // Rename is an attempt to deal with "invalid cross link device" on weird file systems. @@ -600,3 +608,18 @@ func (x *XFile) safeFileMode(current os.FileMode) os.FileMode { return current | minimum } + +func openStatFile(path string) (*os.File, os.FileInfo, error) { + file, err := os.Open(path) + if err != nil { + return nil, nil, fmt.Errorf("os.Open: %w", err) + } + + stat, err := file.Stat() + if err != nil { + _ = file.Close() + return nil, nil, fmt.Errorf("file.Stat: %w", err) + } + + return file, stat, nil +} diff --git a/iso.go b/iso.go index 00c309b..718ef65 100644 --- a/iso.go +++ b/iso.go @@ -10,14 +10,18 @@ import ( ) // ExtractISO writes an ISO's contents to disk. -func ExtractISO(xFile *XFile) (size int64, filesList []string, err error) { - openISO, err := os.Open(xFile.FilePath) +func ExtractISO(xFile *XFile) (size uint64, filesList []string, err error) { + openISO, err := os.Open(xFile.FilePath) // os.Open on purpose. if err != nil { - return 0, nil, fmt.Errorf("failed to open iso file: %s: %w", xFile.FilePath, err) + return 0, nil, fmt.Errorf("os.Open: %w", err) } defer openISO.Close() - iso, err := iso9660.OpenImage(openISO) + image, _ := iso9660.OpenImage(openISO) + + defer xFile.newProgress(getUncompressedIsoSize(image)).done() + + iso, err := iso9660.OpenImage(xFile.prog.readAter(openISO)) if err != nil { return 0, nil, fmt.Errorf("failed to open iso image: %s: %w", xFile.FilePath, err) } @@ -35,7 +39,38 @@ func ExtractISO(xFile *XFile) (size int64, filesList []string, err error) { return size, files, nil } -func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, error) { +//nolint:unparam // so we can pass it in. +func getUncompressedIsoSize(image *iso9660.Image) (total, _ uint64, count int) { + if image == nil { + return total, 0, count + } + + var loop func(isoFile *iso9660.File) + loop = func(isoFile *iso9660.File) { + count++ + + children, err := isoFile.GetChildren() + if err != nil { + return + } + + for _, child := range children { + total += uint64(child.Size()) + loop(child) + } + } + + root, err := image.RootDir() + if err != nil { + return total, 0, count + } + + loop(root) + + return total, 0, count +} + +func (x *XFile) uniso(isoFile *iso9660.File, parent string) (uint64, []string, error) { itemName := filepath.Join(parent, isoFile.Name()) if isoFile.Name() == string([]byte{0}) { // rename root folder. @@ -56,7 +91,7 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er } files := []string{} - size := int64(0) + size := uint64(0) for _, child := range children { childSize, childFiles, err := x.uniso(child, itemName) @@ -74,7 +109,7 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er return size, files, err } -func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, error) { +func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (uint64, []string, error) { file := &file{ Path: x.clean(wfile), Data: isoFile.Reader(), @@ -93,6 +128,8 @@ func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, x.Debugf("Writing archived file: %s (bytes: %d)", file.Path, isoFile.Size()) size, err := x.write(file) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, size, x.prog.Files, int64(x.prog.Wrote)) return size, []string{file.Path}, err } diff --git a/iso_test.go b/iso_test.go index 9fbf17f..437d4ca 100644 --- a/iso_test.go +++ b/iso_test.go @@ -23,7 +23,7 @@ func TestIso(t *testing.T) { require.NoError(t, err, "failed to cleanup writer") }() - size := int64(0) + size := uint64(0) walkErr := filepath.Walk(testFilesInfo.srcFilesDir, func(path string, info os.FileInfo, err error) error { require.NoError(t, err, "unexpected") @@ -37,7 +37,7 @@ func TestIso(t *testing.T) { fStat, err := fileToAdd.Stat() require.NoError(t, err, "failed to stat file") - size += fStat.Size() + size += uint64(fStat.Size()) err = writer.AddFile(fileToAdd, strings.TrimPrefix(fileToAdd.Name(), testFilesInfo.srcFilesDir)) require.NoError(t, err, "failed to add file") diff --git a/progress.go b/progress.go new file mode 100644 index 0000000..c380a3c --- /dev/null +++ b/progress.go @@ -0,0 +1,160 @@ +package xtractr + +import ( + "fmt" + "io" +) + +const maxPercent = 100 + +// Progress provides data about an in-progress file extraction and/or decompression. +type Progress struct { + // Total uncompressed bytes in the archive. + // This number is not available in all archive types, and may be 0. + Total uint64 + // Compressed is the size of the archive file (compressed size). + // It may equal the Total (uncompressed) for non-compressed archives, like tar. + Compressed uint64 + // Wrote this many bytes to disk. + Wrote uint64 + // This many compressed bytes have been read from the archive. + Read uint64 + // Files (number of) written to disk. + Files int + // Count of files in archive. + // This number is not available in all archive types, and may be 0. + Count int + // Done is set to true in the final progress update. + Done bool + // This is the input file. Do not modify the data. + XFile *XFile + send func() +} + +// Percent returns the percent of bytes read or written. +func (p *Progress) Percent() (perc float64) { + if p.Total > 0 { + return float64(p.Wrote) / float64(p.Total) * maxPercent + } else if p.Compressed > 0 { + return float64(p.Read) / float64(p.Compressed) * maxPercent + } + + return 0 +} + +// ArchiveProgress is a helper/example function you can use in your code to print extraction percentages. +// @every - Should be a number between 1 and 50 or so. This controls how often to print the percentage. +// The values 1, 2, 4, 5, 10, 20 and 25 work best. +// @reset - If set true, a `\r` is printed before each line, which will reset it on most terminals. +// @exit - If exit is true, then the for loop exit and the process returns when Progress.Done is true. +// Set `exit` true if you want a separate printer for each archive. A good reason is parallel extractions. +func ArchiveProgress(every float64, progress chan Progress, reset, exit bool) { //nolint:cyclop + var ( + perc, last float64 + pre string + mod = "%s%.0f%% " + ) + + const extra = 0.000000001 + + if reset { + pre = "\r\033[K" + } + + if every < 1 { + mod = "%s%.1f%% " + } + + for prog := range progress { + if prog.Done && exit { + fmt.Println() + return + } + + if prog.Done { + fmt.Println() + last = 0 // reset for the next archive. + + continue + } + + if perc = prog.Percent(); perc == maxPercent && last < maxPercent { + last = maxPercent + + fmt.Printf(mod, pre, perc) + } + + if last == 0 && perc == 0 || perc > last+every { + last = perc + extra // we add extra so 0% only prints once. + + fmt.Printf(mod, pre, perc) + } + } +} + +func (x *XFile) newProgress(total, compressed uint64, count int) *Progress { + x.prog = &Progress{Total: total, Compressed: compressed, Count: count, send: func() {}, XFile: x} + + if x.Progress != nil { + x.prog.send = func() { x.Progress(*x.prog) } + } + + if x.Updates != nil { + x.prog.send = func() { x.Updates <- *x.prog } + } + + return x.prog +} + +// progressWrapper wraps several io interfaces so we can count the bytes read and written to those interfaces. +type progressWrapper struct { + io.Writer + io.Reader + io.ReaderAt + *Progress +} + +func (p *progressWrapper) Write(data []byte) (n int, err error) { + defer p.send() + + size, err := p.Writer.Write(data) + p.Wrote += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *progressWrapper) Read(data []byte) (n int, err error) { + defer p.send() + + size, err := p.Reader.Read(data) + p.Progress.Read += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *progressWrapper) ReadAt(data []byte, off int64) (n int, err error) { + defer p.send() + + size, err := p.ReaderAt.ReadAt(data, off) + p.Progress.Read += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *Progress) writer(writer io.Writer) io.Writer { + p.Files++ + return &progressWrapper{Writer: writer, Progress: p} +} + +func (p *Progress) reader(reader io.Reader) io.Reader { + return &progressWrapper{Reader: reader, Progress: p} +} + +func (p *Progress) readAter(reader io.ReaderAt) io.ReaderAt { + return &progressWrapper{ReaderAt: reader, Progress: p} +} + +func (p *Progress) done() { + p.Done = true + p.send() +} diff --git a/queue.go b/queue.go index 5970fb9..d4cba9c 100644 --- a/queue.go +++ b/queue.go @@ -31,7 +31,7 @@ type Xtract struct { RecurseISO bool // Folder to extract data. Default is same level as SearchPath with a suffix. ExtractTo string - // Leave files in temporary folder? false=move files back to Searchpath + // Leave files in temporary folder? false=move files back to Filter.Path // Moving files back will cause the "extracted files" returned to only contain top-level items. TempFolder bool // Delete Archives after successful extraction? Be careful. @@ -42,17 +42,26 @@ type Xtract struct { CBFunction func(*Response) // Callback Channel, msg sent twice per queued item. CBChannel chan *Response + // Progress is called periodically during file extraction. + // Contains info about the progress of the extraction. + // This is not called if an Updates channel is also provided. + // Shared by all archive file extractions that occur with this Xtract. + Progress func(Progress) + // If an Updates channel is provided, all Progress updates are sent to it. + // Contains info about the progress of the extraction. + // Shared by all archive file extractions that occur with this Xtract. + Updates chan Progress } // Response is sent to the call-back function. The first CBFunction call is just // a notification that the extraction has started. You can determine it's the first -// call by chcking Response.Done. false = started, true = finished. When done=false +// call by checking Response.Done. false = started, true = finished. When done=false // the only other meaningful data provided is the re.Archives, re.Output and re.Queue. type Response struct { // Extract Started (false) or Finished (true). Done bool // Size of data written. - Size int64 + Size uint64 // Temporary output folder. Output string // Items still in queue. @@ -87,6 +96,8 @@ func (x *Xtractr) Extract(extract *Xtract) (int, error) { return queueSize, nil } +const fsSyncDelay = 10 * time.Second + // processQueue runs in a go routine, 'x.Parallel' times, // and watches for things to extract. func (x *Xtractr) processQueue() { @@ -169,6 +180,8 @@ func (x *Xtractr) decompressFolders(resp *Response) error { DeleteOrig: resp.X.DeleteOrig, TempFolder: resp.X.TempFolder, LogFile: resp.X.LogFile, + Updates: resp.X.Updates, + Progress: resp.X.Progress, }, Started: resp.Started, Output: output, @@ -264,6 +277,8 @@ func (x *Xtractr) decompressFiles(resp *Response) error { X: &Xtract{ Password: resp.X.Password, Passwords: resp.X.Passwords, + Progress: resp.X.Progress, + Updates: resp.X.Updates, }, Started: resp.Started, Output: resp.Output, @@ -313,7 +328,7 @@ func (x *Xtractr) decompressArchives(resp *Response) error { // processArchives extracts one archive at a time. // Returns list of archive files extracted, size of data written and files written. -func (x *Xtractr) processArchive(filename string, resp *Response) (int64, []string, []string, error) { +func (x *Xtractr) processArchive(filename string, resp *Response) (uint64, []string, []string, error) { if err := os.MkdirAll(resp.Output, x.config.DirMode); err != nil { return 0, nil, nil, fmt.Errorf("making output dir: %w", err) } @@ -328,6 +343,8 @@ func (x *Xtractr) processArchive(filename string, resp *Response) (int64, []stri Passwords: resp.X.Passwords, Password: resp.X.Password, log: x.config.Logger, + Updates: resp.X.Updates, + Progress: resp.X.Progress, }) if err != nil { x.DeleteFiles(resp.Output) // clean up the mess after an error and bail. @@ -349,6 +366,7 @@ func (x *Xtractr) cleanupProcessedArchives(resp *Response) error { var err error if !resp.X.TempFolder { + time.Sleep(fsSyncDelay) // Wait for file system to catch up/sync. // If TempFolder is false then move the files back to the original location. resp.NewFiles, err = x.MoveFiles(resp.Output, resp.X.Path, false) } diff --git a/queue_test.go b/queue_test.go index 46b37ac..d23f9be 100644 --- a/queue_test.go +++ b/queue_test.go @@ -23,7 +23,7 @@ var filesInTestArchive = []string{ const ( testFile = "test_data/archive.rar" - testDataSize = int64(20770) + testDataSize = uint64(20770) ) type testLogger struct{ t *testing.T } diff --git a/rar.go b/rar.go index 2b87ebb..ec868fc 100644 --- a/rar.go +++ b/rar.go @@ -13,7 +13,7 @@ import ( ) // ExtractRAR attempts to extract a file as a rar file. -func ExtractRAR(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func ExtractRAR(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { if len(xFile.Passwords) == 0 && xFile.Password == "" { return extractRAR(xFile) } @@ -55,29 +55,54 @@ func ExtractRAR(xFile *XFile) (size int64, filesList, archiveList []string, err } // extractRAR extracts a rar file. to a destination. This wraps github.com/nwaples/rardecode. -func extractRAR(xFile *XFile) (int64, []string, []string, error) { +func extractRAR(xFile *XFile) (uint64, []string, []string, error) { rarReader, err := rardecode.OpenReader(xFile.FilePath, xFile.Password) if err != nil { return 0, nil, nil, fmt.Errorf("rardecode.OpenReader: %w", err) } + + defer xFile.newProgress(getUncompressedRarSize(rarReader)).done() // this closes rarReader + + rarReader, err = rardecode.OpenReader(xFile.FilePath, xFile.Password) // open it again. + if err != nil { + return 0, nil, nil, fmt.Errorf("rardecode.OpenReader: %w", err) + } defer rarReader.Close() - size, files, err := xFile.unrar(rarReader) + files, err := xFile.unrar(rarReader) if err != nil { lastFile := xFile.FilePath if volumes := rarReader.Volumes(); len(volumes) > 0 { lastFile = volumes[len(volumes)-1] } - return size, files, rarReader.Volumes(), fmt.Errorf("%s: %w", lastFile, err) + return xFile.prog.Wrote, files, []string{xFile.FilePath}, fmt.Errorf("%s: %w", lastFile, err) } - return size, files, rarReader.Volumes(), nil + return xFile.prog.Wrote, files, []string{xFile.FilePath}, nil } -func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) { +func getUncompressedRarSize(rarReader *rardecode.ReadCloser) (total, compressed uint64, count int) { + defer rarReader.Close() + + for { + header, err := rarReader.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return total, 0, count + } + + return total, 0, count + } + + total += uint64(header.UnPackedSize) + // compressed += uint64(header.PackedSize) + count++ + } +} + +func (x *XFile) unrar(rarReader *rardecode.ReadCloser) ([]string, error) { files := []string{} - size := int64(0) for { header, err := rarReader.Next() @@ -86,7 +111,7 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) break } - return size, files, fmt.Errorf("rarReader.Next: %w", err) + return files, fmt.Errorf("rarReader.Next: %w", err) } file := &file{ @@ -100,7 +125,7 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(file.Path, filepath.Join(x.OutputDir)) { // The file being written is trying to write outside of our base path. Malicious archive? - return size, files, fmt.Errorf("%s: %w: %s != %s (from: %s)", + return files, fmt.Errorf("%s: %w: %s != %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, x.OutputDir, header.Name) } @@ -108,25 +133,24 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) x.Debugf("Writing archived directory: %s", file.Path) if err = x.mkDir(file.Path, header.Mode(), header.ModificationTime); err != nil { - return size, files, fmt.Errorf("making rar file dir: %w", err) + return files, fmt.Errorf("making rar file dir: %w", err) } continue } - x.Debugf("Writing archived file: %s (packed: %d, unpacked: %d)", file.Path, header.PackedSize, header.UnPackedSize) + x.Debugf("Writing archived file: %s (packed: %d, unpacked: %d)", + file.Path, header.PackedSize, header.UnPackedSize) fSize, err := x.write(file) if err != nil { - return size, files, err + return files, err } files = append(files, file.Path) - size += fSize - x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", file.Path, fSize, len(files), size) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, fSize, x.prog.Files, x.prog.Wrote) } - files, err := x.cleanup(files) - - return size, files, err + return x.cleanup(files) } diff --git a/rpm.go b/rpm.go index ab3413b..5a2d284 100644 --- a/rpm.go +++ b/rpm.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "os" "github.com/cavaliergopher/rpm" "github.com/klauspost/compress/zstd" @@ -21,17 +20,25 @@ var ( ) // ExtractRPM extract a file as a RedHat Package Manager file. -func ExtractRPM(xFile *XFile) (size int64, filesList []string, err error) { //nolint:cyclop - rpmFile, err := os.Open(xFile.FilePath) +func ExtractRPM(xFile *XFile) (size uint64, filesList []string, err error) { + rpmFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer rpmFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + files, err := xFile.extractRPM(xFile.prog.reader(rpmFile)) + + return xFile.prog.Wrote, files, err +} + +func (x *XFile) extractRPM(rpmFile io.Reader) (filesList []string, err error) { //nolint:cyclop // Read the package headers pkg, err := rpm.Read(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("rpm.Read: %w", err) + return nil, fmt.Errorf("rpm.Read: %w", err) } // Check the RPM compression algorithm. @@ -39,48 +46,48 @@ func ExtractRPM(xFile *XFile) (size int64, filesList []string, err error) { //no case "xz": zipReader, err := xz.NewReader(rpmFile, 0) if err != nil { - return 0, nil, fmt.Errorf("xz.NewReader: %w", err) + return nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "gz", "gzip": zipReader, err := gzip.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) + return nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipReader.Close() - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "bz2", "bzip2": - return xFile.unrpm(bzip2.NewReader(rpmFile), pkg.PayloadFormat()) + return x.unrpm(bzip2.NewReader(rpmFile), pkg.PayloadFormat()) case "zstd", "zstandard", "zst", "Zstandard": zipReader, err := zstd.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("zstd.NewReader: %w", err) + return nil, fmt.Errorf("zstd.NewReader: %w", err) } defer zipReader.Close() - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "lzma2": zipReader, err := lzma.NewReader2(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err) + return nil, fmt.Errorf("lzma.NewReader2: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "lzma", "lzip": zipReader, err := lzma.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("lzma.NewReader: %w", err) + return nil, fmt.Errorf("lzma.NewReader: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) default: - return 0, nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMCompression, compression) + return nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMCompression, compression) } } -func (x *XFile) unrpm(reader io.Reader, format string) (size int64, filesList []string, err error) { +func (x *XFile) unrpm(reader io.Reader, format string) (filesList []string, err error) { // Check the archive format of the payload switch format { case "cpio": @@ -90,6 +97,6 @@ func (x *XFile) unrpm(reader io.Reader, format string) (size int64, filesList [] case "ar": return x.unAr(reader) default: - return 0, nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMArchiveFmt, format) + return nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMArchiveFmt, format) } } diff --git a/tar.go b/tar.go index 5631432..911ec3a 100644 --- a/tar.go +++ b/tar.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "os" "strings" "time" @@ -17,96 +16,119 @@ import ( ) // ExtractTar extracts a raw (non-compressed) tar archive. -func ExtractTar(xFile *XFile) (size int64, filesList []string, err error) { - tarFile, err := os.Open(xFile.FilePath) +func ExtractTar(xFile *XFile) (size uint64, filesList []string, err error) { + tarFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer tarFile.Close() - return xFile.untar(tarFile) + defer xFile.newProgress(uint64(stat.Size()), uint64(stat.Size()), 0).done() + + files, err := xFile.untar(xFile.prog.reader(tarFile)) + + return xFile.prog.Wrote, files, err } // ExtractTarBzip extracts a bzip2-compressed tar archive. -func ExtractTarBzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarBzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - return xFile.untar(bzip2.NewReader(compressedFile)) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + files, err := xFile.untar(bzip2.NewReader(xFile.prog.reader(compressedFile))) + + return xFile.prog.Wrote, files, err } // ExtractTarXZ extracts an XZ-compressed tar archive (txz). -func ExtractTarXZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarXZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := xz.NewReader(compressedFile, 0) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := xz.NewReader(xFile.prog.reader(compressedFile), 0) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarZ extracts an LZW-compressed tar archive (tz). -func ExtractTarZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := lzw.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := lzw.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzw.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarGzip extracts a gzip-compressed tar archive (tgz). -func ExtractTarGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipStream.Close() - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarLzip extracts an LZIP-compressed tar archive (tlz). -func ExtractTarLzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarLzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := lzma.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := lzma.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } -func (x *XFile) untar(reader io.Reader) (int64, []string, error) { +func (x *XFile) untar(reader io.Reader) ([]string, error) { tarReader := tar.NewReader(reader) files := []string{} - size := int64(0) for { header, err := tarReader.Next() @@ -115,25 +137,25 @@ func (x *XFile) untar(reader io.Reader) (int64, []string, error) { break } - return size, files, fmt.Errorf("%s: tarReader.Next: %w", x.FilePath, err) + return files, fmt.Errorf("%s: tarReader.Next: %w", x.FilePath, err) } fSize, err := x.untarFile(header, tarReader) if err != nil { - return size, files, err + return files, err } files = append(files, header.Name) - size += fSize - x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", header.Name, fSize, len(files), size) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + header.Name, fSize, x.prog.Files, x.prog.Wrote) } files, err := x.cleanup(files) - return size, files, err + return files, err } -func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (int64, error) { +func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (uint64, error) { file := &file{ Path: x.clean(header.Name), Data: tarReader, diff --git a/util_test.go b/util_test.go index 89a001b..fe6987f 100644 --- a/util_test.go +++ b/util_test.go @@ -14,7 +14,7 @@ import ( type testFilesInfo struct { srcFilesDir string dstFilesDir string - dataSize int64 + dataSize uint64 fileCount int archiveCount int } diff --git a/zip.go b/zip.go index df9b112..c068c85 100644 --- a/zip.go +++ b/zip.go @@ -11,34 +11,45 @@ import ( /* How to extract a ZIP file. */ // ExtractZIP extracts a zip file.. to a destination. Simple enough. -func ExtractZIP(xFile *XFile) (size int64, filesList []string, err error) { +func ExtractZIP(xFile *XFile) (size uint64, filesList []string, err error) { zipReader, err := zip.OpenReader(xFile.FilePath) if err != nil { return 0, nil, fmt.Errorf("zip.OpenReader: %w", err) } defer zipReader.Close() + defer xFile.newProgress(getUncompressedZipSize(zipReader)).done() + files := []string{} - size = int64(0) for _, zipFile := range zipReader.File { fSize, wfile, err := xFile.unzip(zipFile) if err != nil { - return size, files, fmt.Errorf("%s: %w", xFile.FilePath, err) + return xFile.prog.Wrote, files, fmt.Errorf("%s: %w", xFile.FilePath, err) } //nolint:gosec // this is safe because we clean the paths. files = append(files, filepath.Join(xFile.OutputDir, zipFile.Name)) - size += fSize - xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", wfile, fSize, len(files), size) + xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + wfile, fSize, xFile.prog.Files, xFile.prog.Wrote) } files, err = xFile.cleanup(files) - return size, files, err + return xFile.prog.Wrote, files, err +} + +func getUncompressedZipSize(zipReader *zip.ReadCloser) (total, compressed uint64, count int) { + for _, zipFile := range zipReader.File { + total += zipFile.UncompressedSize64 + // compressed += zipFile.CompressedSize64 + count++ + } + + return total, 0, count } -func (x *XFile) unzip(zipFile *zip.File) (int64, string, error) { +func (x *XFile) unzip(zipFile *zip.File) (uint64, string, error) { zFile, err := zipFile.Open() if err != nil { return 0, zipFile.Name, fmt.Errorf("zipFile.Open: %w", err) diff --git a/zip_test.go b/zip_test.go index c593956..1769fe7 100644 --- a/zip_test.go +++ b/zip_test.go @@ -33,7 +33,7 @@ func makeZipFile(t *testing.T) testFilesInfo { t.Helper() const ( - dataSize = int64(21) + dataSize = uint64(21) fileCount = 5 archiveCount = 1 )