11package wayback
22
33import (
4+ "bytes"
45 "context"
56 "fmt"
67 "io"
78 "log"
89 "net/http"
9- "os"
10- "path/filepath"
1110 "strings"
1211 "sync/atomic"
1312 "time"
@@ -33,8 +32,9 @@ type Config struct {
3332 DownloadExternalAssets bool
3433 Debug bool
3534 StopOnError bool
36- CDXRatePerMin int // CDX API requests per minute (default 60)
37- CDXMaxRetries int // max retry attempts on throttle/5xx (default 5)
35+ CDXRatePerMin int // CDX API requests per minute (default 60)
36+ CDXMaxRetries int // max retry attempts on throttle/5xx (default 5)
37+ Storage Storage // if nil, NewLocalStorage(Directory) is used
3838}
3939
4040var downloadHTTPClient = & http.Client {
@@ -69,8 +69,9 @@ func DownloadAll(cfg *Config) error {
6969 fmt .Printf ("Found %d unique snapshots to download.\n " , total )
7070 }
7171
72- if err := os .MkdirAll (cfg .Directory , 0750 ); err != nil {
73- return fmt .Errorf ("create output dir: %w" , err )
72+ store := cfg .Storage
73+ if store == nil {
74+ store = NewLocalStorage (cfg .Directory )
7475 }
7576
7677 pool , err := ants .NewPool (cfg .Threads )
@@ -91,7 +92,7 @@ func DownloadAll(cfg *Config) error {
9192 }
9293 errCh := make (chan error , 1 )
9394 if err := pool .Submit (func () {
94- errCh <- downloadOne (ctx , s , cfg , idx , dlProg )
95+ errCh <- downloadOne (ctx , s , cfg , store , idx , dlProg )
9596 }); err != nil {
9697 return fmt .Errorf ("submit task: %w" , err )
9798 }
@@ -119,17 +120,16 @@ func DownloadAll(cfg *Config) error {
119120}
120121
121122// downloadOne downloads a single snapshot and optionally rewrites its links.
122- func downloadOne (ctx context.Context , snap Snapshot , cfg * Config , idx * SnapshotIndex , dlProg * Progress ) error {
123+ func downloadOne (ctx context.Context , snap Snapshot , cfg * Config , store Storage , idx * SnapshotIndex , dlProg * Progress ) error {
123124
124125 if ctx .Err () != nil {
125126 return ctx .Err ()
126127 }
127128
128- localPath := URLToLocalPath (snap .FileURL , cfg .PrettyPath )
129- localPath = filepath .Join (cfg .Directory , filepath .FromSlash (localPath ))
129+ logicalPath := URLToLocalPath (snap .FileURL , cfg .PrettyPath )
130130
131131 // Skip existing files
132- if _ , err := os . Stat ( localPath ); err == nil {
132+ if store . Exists ( logicalPath ) {
133133 dlProg .Inc ()
134134 return nil
135135 }
@@ -160,53 +160,27 @@ func downloadOne(ctx context.Context, snap Snapshot, cfg *Config, idx *SnapshotI
160160 return fmt .Errorf ("HTTP %d for %s" , resp .StatusCode , waybackURL )
161161 }
162162
163- // Ensure parent directory exists
164- if err := os .MkdirAll (filepath .Dir (localPath ), 0750 ); err != nil {
165- return fmt .Errorf ("mkdirall: %w" , err )
166- }
167-
168- // Stream to temp file, then rename atomically
169- tmpFile , err := os .CreateTemp (filepath .Dir (localPath ), ".wbdl-*" )
170- if err != nil {
171- return fmt .Errorf ("create temp: %w" , err )
172- }
173- tmpName := tmpFile .Name ()
174- defer func () {
175- _ = tmpFile .Close ()
176- _ = os .Remove (tmpName ) // no-op if renamed
177- }()
178-
179- // Read first 512 bytes for content sniffing
163+ // Read first 512 bytes for content sniffing, then stream remainder via storage
180164 first := make ([]byte , 512 )
181165 n , _ := io .ReadFull (resp .Body , first )
182166 first = first [:n ]
183167
184- if _ , err := tmpFile .Write (first ); err != nil {
185- return fmt .Errorf ("write first bytes: %w" , err )
186- }
187- if _ , err := io .Copy (tmpFile , resp .Body ); err != nil {
188- return fmt .Errorf ("write body: %w" , err )
189- }
190- if err := tmpFile .Close (); err != nil {
191- return fmt .Errorf ("close temp: %w" , err )
192- }
193-
194- if err := os .Rename (tmpName , localPath ); err != nil { //nolint:gosec // G703: localPath is sanitized by URLToLocalPath
195- return fmt .Errorf ("rename: %w" , err )
168+ if err := store .Put (logicalPath , io .MultiReader (bytes .NewReader (first ), resp .Body )); err != nil {
169+ return fmt .Errorf ("store: %w" , err )
196170 }
197171
198172 // Post-process HTML / CSS
199173 if cfg .RewriteLinks {
200174 ct := resp .Header .Get ("Content-Type" )
201175 fileURL := snap .FileURL
202176
203- if IsHTMLFile (localPath , ct , first ) {
204- if err := ProcessHTML (localPath , fileURL , cfg , idx ); err != nil && cfg .Debug {
205- log .Printf ("html rewrite %s: %v" , localPath , err )
177+ if IsHTMLFile (logicalPath , ct , first ) {
178+ if err := ProcessHTML (store , logicalPath , fileURL , cfg , idx ); err != nil && cfg .Debug {
179+ log .Printf ("html rewrite %s: %v" , logicalPath , err )
206180 }
207- } else if IsCSSResource (localPath , ct ) {
208- if err := RewriteCSSFile (localPath , fileURL , cfg , idx ); err != nil && cfg .Debug {
209- log .Printf ("css rewrite %s: %v" , localPath , err )
181+ } else if IsCSSResource (logicalPath , ct ) {
182+ if err := RewriteCSSFile (store , logicalPath , fileURL , cfg , idx ); err != nil && cfg .Debug {
183+ log .Printf ("css rewrite %s: %v" , logicalPath , err )
210184 }
211185 }
212186 }
0 commit comments