Skip to content

Commit ddb3b48

Browse files
committed
Checksum files as they are being streamed in. Speeds up load times significantly.
1 parent 0e944de commit ddb3b48

File tree

2 files changed

+101
-54
lines changed

2 files changed

+101
-54
lines changed

index.js

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ var urllib = require('url');
1616
var debug = require('debug')('http-disk-cache');
1717
var glob = require('glob');
1818

19+
var util = require('util');
20+
var stream = require('stream');
21+
1922
/////////////// CacheEntry ///////////////
2023

2124
function canonicalUrl(url) {
@@ -178,6 +181,33 @@ CacheWriter.prototype.pipeFrom = function pipeFrom(readable) {
178181
});
179182
};
180183

184+
function ChecksumStream(expectedChecksum, options) {
185+
if (!(this instanceof ChecksumStream)) {
186+
return new ChecksumStream(expectedChecksum, options);
187+
}
188+
stream.Transform.call(this, options);
189+
this.hash = crypto.createHash('md5');
190+
this.expectedChecksum = expectedChecksum;
191+
}
192+
util.inherits(ChecksumStream, stream.Transform);
193+
194+
ChecksumStream.prototype._transform = function (chunk, enc, cb) {
195+
var buffer = Buffer.isBuffer(chunk) ? chunk : new Buffer(chunk, enc);
196+
this.hash.update(buffer); // update hash
197+
this.push(chunk, enc);
198+
cb();
199+
};
200+
201+
ChecksumStream.prototype._flush = function (cb) {
202+
console.log('flushing checksum stream');
203+
var checksum = this.hash.digest('hex');
204+
if (checksum != this.expectedChecksum) {
205+
return cb(new Error('invalid checksum'));
206+
}
207+
cb();
208+
};
209+
210+
181211
/////////////// HTTPCache ///////////////
182212

183213
// HTTPCache handles HTTP requests, and caches them to disk if allowed by the Cache-Control
@@ -247,7 +277,7 @@ function deleteEntry(metaPath, cb) {
247277
// 'notcached' - the cache entry is missing, invalid, or expired, but ready to be cached anew.
248278
// 'error' - the cache entry is corrupted, and could not be deleted. This indicates that
249279
// we shouldn't try to cache any responses right now.
250-
HTTPCache.prototype._checkCache = function(cacheEntry, callback) {
280+
HTTPCache.prototype._checkCache = function(cacheEntry, skipVerify, callback) {
251281
var _this = this;
252282
function loadMetadata(cb) {
253283
debug('loading metadata from', cacheEntry.metaPath);
@@ -339,8 +369,12 @@ HTTPCache.prototype._checkCache = function(cacheEntry, callback) {
339369
});
340370
}
341371

342-
// We now have valid metadata for an un-expired cache entry. Next, we checksum the contents.
343-
validateContents(metadata);
372+
if (skipVerify) {
373+
return callback(null, CACHE_STATE_CACHED, metadata);
374+
} else {
375+
// We now have valid metadata for an un-expired cache entry. Next, we checksum the contents.
376+
validateContents(metadata);
377+
}
344378
});
345379
};
346380

@@ -404,20 +438,22 @@ HTTPCache.prototype.assertCached = function(url, onProgress, cb) {
404438
options = { url: url };
405439
}
406440

407-
options._skipReadStream = true;
408-
409441
var entry = new CacheEntry(url, options.etagFormat);
410442

411-
this._checkCache(entry, function(err, cacheStatus) {
412-
if (cacheStatus === CACHE_STATE_CACHED) {
413-
debug('assert cache hit', url);
414-
return cb();
415-
} else {
416-
debug('assert cache miss', url);
417-
_this.openReadStream(options, onProgress, function(err, _, path) {
418-
cb(err);
419-
});
443+
this.openReadStream(options, onProgress, function (err, readStream, path) {
444+
if (err != null) {
445+
return cb(err);
420446
}
447+
if (readStream == null) { throw new Error("HAY"); }
448+
readStream.on('error', function(err) {
449+
readStream.removeAllListeners();
450+
cb(err);
451+
});
452+
readStream.on('end', function() {
453+
readStream.removeAllListeners();
454+
cb();
455+
});
456+
readStream.resume();
421457
});
422458
};
423459

@@ -464,14 +500,21 @@ HTTPCache.prototype.openReadStream = function(url, onProgress, cb) {
464500
var cacheWriter = this._createCacheWriter(entry);
465501

466502
// Check if the entry is available in the cache.
467-
this._checkCache(entry, function(err, cacheStatus) {
503+
this._checkCache(entry, true, function(err, cacheStatus, metadata) {
468504

469505
debug("cache entry", entry.url, "status=", cacheStatus);
470506
if (cacheStatus === CACHE_STATE_CACHED) {
471507
// The cache contents are present and valid, so serve the request from cache.
472508
cacheWriter.end();
473509
var readStream = options._skipReadStream ? null : _this._createContentReadStream(entry);
474-
return cb(null, readStream, _this._absPath(entry.contentPath));
510+
var checksumStream = new ChecksumStream(metadata.contentMD5);
511+
checksumStream.on('error', function (err) {
512+
if (err === 'invalid checksum') {
513+
deleteEntry(_this._absPath(entry.metaPath), function(err) {});
514+
}
515+
});
516+
readStream.pipe(checksumStream);
517+
return cb(null, checksumStream, _this._absPath(entry.contentPath));
475518
} else if (cacheStatus == CACHE_STATE_ERROR) {
476519
// Some kind of error occurred and we can't access the cache.
477520
return cb("Error: There was a problem with the asset cache and we can't write files");
@@ -582,6 +625,8 @@ HTTPCache.prototype.getContents = function(url, cb) {
582625
}
583626
debug("getContents start", options.url);
584627

628+
options._skipVerify = true;
629+
585630
this.openReadStream(options, function(err, readStream, path) {
586631
if (err) { return cb(err); }
587632

@@ -754,7 +799,7 @@ HTTPCache.prototype.repair = function(cb) {
754799
return;
755800
}
756801

757-
_this._checkCache(entry, function (err, status) {
802+
_this._checkCache(entry, false, function (err, status) {
758803
if (err != null) {
759804
deleteEntry(metaPath, deleteCb);
760805
return;
@@ -861,3 +906,4 @@ HTTPCache.prototype.clean = function (shouldClean, cb) {
861906
};
862907

863908
exports.HTTPCache = HTTPCache;
909+
exports.ChecksumStream = ChecksumStream;

test.js

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ var execSync = require('child_process').execSync;
1313
var debug = require('debug')('http-disk-cache');
1414
var async = require('artillery-async');
1515
var glob = require('glob');
16+
var stream = require('stream');
1617

1718
var httpcache = require('./index');
1819

@@ -36,16 +37,19 @@ function newUrlReply(contents, status, headers, defer) {
3637

3738
function catStream(stream, cb) {
3839
chunks = [];
40+
stream.on('error', function (err) {
41+
cb(err);
42+
});
3943
stream.on('data', function (chunk) {
4044
chunks.push(chunk);
4145
});
4246
stream.on('end', function () {
4347
if (chunks.length === 0) {
44-
cb(null);
48+
cb(null, null);
4549
} else if (typeof chunks[0] === 'string') {
46-
cb(chunks.join(''));
50+
cb(null, chunks.join(''));
4751
} else { // Buffer
48-
cb(Buffer.concat(chunks));
52+
cb(null, Buffer.concat(chunks));
4953
}
5054
});
5155
}
@@ -166,7 +170,7 @@ exports.tests = {
166170
var _this = this;
167171
this.cache.openReadStream(this.createUrl('/url1'), function(err, stream, path) {
168172
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
169-
catStream(stream, function (contents) {
173+
catStream(stream, function (err, contents) {
170174
test.equal(contents.toString('utf8'), 'url1 contents');
171175
test.done();
172176
});
@@ -182,7 +186,7 @@ exports.tests = {
182186
test.equal(_this.requests.length, 1);
183187
test.equal(_this.requests[0], '/url5');
184188
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
185-
catStream(stream, function (contents) {
189+
catStream(stream, function (err, contents) {
186190
test.equal(contents.toString('utf8'), 'url5 contents');
187191
test.done();
188192
});
@@ -192,7 +196,7 @@ exports.tests = {
192196
_this.cache.openReadStream({ url: _this.createUrl('/url5'), etagFormat: 'md5' }, function(err, stream, path) {
193197
test.equal(_this.requests.length, 1); // request is handled from cache.
194198
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
195-
catStream(stream, function (contents) {
199+
catStream(stream, function (err, contents) {
196200
test.equal(contents.toString('utf8'), 'url5 contents');
197201
test.done();
198202
});
@@ -213,7 +217,7 @@ exports.tests = {
213217
test.equal(_this.requests.length, 1);
214218
test.equal(_this.requests[0], '/url7');
215219
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
216-
catStream(stream, function (contents) {
220+
catStream(stream, function (err, contents) {
217221
test.equal(contents.toString('utf8'), 'url7 contents');
218222
test.done();
219223
});
@@ -223,7 +227,7 @@ exports.tests = {
223227
_this.cache.openReadStream({ url: _this.createUrl('/url7'), etagFormat: 'md5' }, function(err, stream, path) {
224228
test.equal(_this.requests.length, 1); // request is handled from cache.
225229
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
226-
catStream(stream, function (contents) {
230+
catStream(stream, function (err, contents) {
227231
test.equal(contents.toString('utf8'), 'url7 contents');
228232
test.done();
229233
});
@@ -256,7 +260,7 @@ exports.tests = {
256260
_this.cache.openReadStream({ url: _this.createUrl('/url1'), etagFormat: 'md5' }, function(err, stream, path) {
257261
test.equal(_this.requests.length, 1);
258262
test.equal(_this.requests[0], '/url1');
259-
catStream(stream, function (contents) {
263+
catStream(stream, function (err, contents) {
260264
test.equal(contents.toString('utf8'), 'url1 contents');
261265
cb();
262266
});
@@ -268,7 +272,7 @@ exports.tests = {
268272
_this.cache.openReadStream({ url: _this.createUrl('/url1'), etagFormat: 'md5' }, function(err, stream, path) {
269273
test.equal(_this.requests.length, 2);
270274
test.equal(_this.requests[1], '/url1');
271-
catStream(stream, function (contents) {
275+
catStream(stream, function (err, contents) {
272276
test.equal(contents.toString('utf8'), 'url1 contents');
273277
cb();
274278
});
@@ -358,7 +362,7 @@ exports.tests = {
358362
},
359363

360364
testConcurrentRequests: function(test) {
361-
test.expect(4);
365+
test.expect(2);
362366
var _this = this;
363367
var count = 2;
364368

@@ -367,8 +371,7 @@ exports.tests = {
367371
if (count === 0) { test.done(); }
368372
};
369373
var cb = function(err, stream, path) {
370-
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
371-
catStream(stream, function (contents) {
374+
catStream(stream, function (err, contents) {
372375
test.equal(contents.toString('utf8'), 'url1 contents');
373376
barrier();
374377
});
@@ -379,34 +382,34 @@ exports.tests = {
379382

380383

381384
testBasicCaching: function(test) {
382-
test.expect(8);
385+
test.expect(6);
383386
doTest(this, test, '/url1', 'url1 contents', false, true, 0, test.done);
384387
},
385388

386389
testExplicitNoCache: function(test) {
387-
test.expect(8);
390+
test.expect(6);
388391
doTest(this, test, '/url2', 'url2 contents', false, false, 0, test.done);
389392
},
390393

391394
testUnparseableCacheControl: function(test) {
392-
test.expect(8);
395+
test.expect(6);
393396
doTest(this, test, '/url4', 'url4 contents', false, false, 0, test.done);
394397
},
395398

396399
testNoCache: function(test) {
397400
// URLs without a Cache-Control header don't get cached.
398-
test.expect(8);
401+
test.expect(6);
399402
doTest(this, test, '/url3', 'url3 contents', false, false, 0, test.done);
400403
},
401404

402405
testUnexpiredCache: function(test) {
403-
test.expect(8);
406+
test.expect(6);
404407
// 200 is the maximum allowable age.
405408
doTest(this, test, '/url1', 'url1 contents', false, true, 200, test.done);
406409
},
407410

408411
testExpiredCache: function(test) {
409-
test.expect(8);
412+
test.expect(6);
410413
doTest(this, test, '/url1', 'url1 contents', false, false, 201, test.done);
411414
},
412415

@@ -691,29 +694,27 @@ exports.tests = {
691694
function doTest(_this, test, url, contents, firstCached, secondCached, deltaT, cb) {
692695
var count = 0;
693696
if (!deltaT) { deltaT = 0; }
694-
_this.cache.openReadStream(_this.createUrl(url), function(err, stream, path) {
697+
698+
_this.cache.getContents(_this.createUrl(url), function(err, buffer, path) {
695699
if (!firstCached) { count++; }
696-
if (!stream) {
697-
test.ok(err, "if stream is null there had better be an error");
700+
if (!buffer) {
701+
test.ok(err, "if buffer is null there had better be an error");
698702
return cb();
699703
}
700-
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
701704
test.ok(fs.existsSync(path));
702-
catStream(stream, function (contents) {
703-
test.equal(contents.toString('utf8'), contents);
705+
706+
test.equal(buffer.toString('utf8'), contents);
707+
test.equal(_this.serverUrls[url].fetchCount, count);
708+
_this.nowSeconds += deltaT;
709+
_this.cache.reset();
710+
711+
_this.cache.getContents(_this.createUrl(url), function(err, buffer, path) {
712+
if (!secondCached) { count++; }
713+
714+
test.ok(fs.existsSync(path));
704715
test.equal(_this.serverUrls[url].fetchCount, count);
705-
_this.nowSeconds += deltaT;
706-
_this.cache.reset();
707-
_this.cache.openReadStream(_this.createUrl(url), function(err, stream, path) {
708-
if (!secondCached) { count++; }
709-
test.ok(stream instanceof fs.ReadStream, "stream should be an fs.ReadStream");
710-
test.ok(fs.existsSync(path));
711-
test.equal(_this.serverUrls[url].fetchCount, count);
712-
catStream(stream, function (contents) {
713-
test.equal(contents.toString('utf8'), contents);
714-
cb();
715-
});
716-
});
716+
test.equal(buffer.toString('utf8'), contents);
717+
cb();
717718
});
718719
});
719720

0 commit comments

Comments
 (0)