Skip to content

Commit b245c1c

Browse files
committed
Bump deps.
1 parent 63004c1 commit b245c1c

23 files changed

+2504
-1488
lines changed

bin/textract

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ var path = require( 'path' )
2020

2121
var cli = meow({
2222
help: help,
23-
pkg: '../package.json'
23+
pkg: require('../package.json')
2424
});
2525

2626
if ( !cli.input || cli.input.length === 0 ) {

lib/extract.js

+8-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
var fs = require( 'fs' )
22
, path = require( 'path' )
3-
, XmlEntities = require( 'html-entities' ).XmlEntities
3+
, { XmlEntities } = require( 'html-entities' )
44
, util = require( './util' )
55
, extractorPath = path.join( __dirname, 'extractors' )
66
, entities = new XmlEntities()
@@ -11,8 +11,8 @@ var fs = require( 'fs' )
1111
, satisfiedExtractors = 0
1212
, hasInitialized = false
1313
, STRIP_ONLY_SINGLE_LINEBREAKS = /(^|[^\n])\n(?!\n)/g
14-
, WHITELIST_PRESERVE_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50\uFDFF \uFE70\uFEFF \uFF01-\uFFE6 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w\n\r]*/g // eslint-disable-line max-len
15-
, WHITELIST_STRIP_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50\uFDFF \uFE70\uFEFF \uFF01-\uFFE6 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w]*/g // eslint-disable-line max-len
14+
, WHITELIST_PRESERVE_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50\uFDFF \uFE70\uFEFF \uFF01-\uFFE6 \.,\?'""«»!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w\n\r]*/g // eslint-disable-line max-len
15+
, WHITELIST_STRIP_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50\uFDFF \uFE70\uFEFF \uFF01-\uFFE6 \.,\?'""«»!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w]*/g // eslint-disable-line max-len
1616
;
1717

1818
function registerExtractor( extractor ) {
@@ -21,10 +21,8 @@ function registerExtractor( extractor ) {
2121
if ( typeof type === 'string' ) {
2222
type = type.toLowerCase();
2323
typeExtractors[type] = extractor.extract;
24-
} else {
25-
if ( type instanceof RegExp ) {
26-
regexExtractors.push({ reg: type, extractor: extractor.extract });
27-
}
24+
} else if ( type instanceof RegExp ) {
25+
regexExtractors.push({ reg: type, extractor: extractor.extract });
2826
}
2927
});
3028
}
@@ -107,8 +105,7 @@ function findExtractor( type ) {
107105
var i
108106
, iLen = regexExtractors.length
109107
, extractor
110-
, regexExtractor
111-
;
108+
, regexExtractor;
112109

113110
type = type.toLowerCase();
114111
if ( typeExtractors[type] ) {
@@ -144,8 +141,8 @@ function extract( type, filePath, options, cb ) {
144141

145142
// update error message if type is supported but just not configured/installed properly
146143
if ( failedExtractorTypes[type] ) {
147-
msg += ', extractor for type exists, but failed to initialize.' +
148-
' Message: ' + failedExtractorTypes[type];
144+
msg += ', extractor for type exists, but failed to initialize.'
145+
+ ' Message: ' + failedExtractorTypes[type];
149146
}
150147

151148
error = new Error( msg );

lib/extractors/doc-osx.js

+8-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
var spawn = require( 'child_process' ).spawn
2-
, exec = require( 'child_process' ).exec
1+
var { spawn } = require( 'child_process' )
2+
, { exec } = require( 'child_process' )
33
, os = require( 'os' )
44
, path = require( 'path' )
55
, types
@@ -9,9 +9,7 @@ var spawn = require( 'child_process' ).spawn
99
function extractText( filePath, options, cb ) {
1010
var result = ''
1111
, error = null
12-
, textutil = spawn( 'textutil', ['-convert', 'txt', '-stdout', filePath] )
13-
;
14-
12+
, textutil = spawn( 'textutil', ['-convert', 'txt', '-stdout', filePath] );
1513
textutil.stdout.on( 'data', function( buffer ) {
1614
result += buffer.toString();
1715
});
@@ -25,8 +23,8 @@ function extractText( filePath, options, cb ) {
2523

2624
textutil.on( 'close', function( /* code */ ) {
2725
if ( error ) {
28-
error = new Error( 'textutil read of file named [[ ' +
29-
path.basename( filePath ) + ' ]] failed: ' + error );
26+
error = new Error( 'textutil read of file named [[ '
27+
+ path.basename( filePath ) + ' ]] failed: ' + error );
3028
cb( error, null );
3129
return;
3230
}
@@ -45,12 +43,11 @@ function testForBinary( options, cb ) {
4543
function( error /* , stdout, stderr */ ) {
4644
var msg;
4745
if ( error !== null ) {
48-
msg = 'INFO: \'textutil\' does not appear to be installed, ' +
49-
'so textract will be unable to extract DOCs.';
46+
msg = 'INFO: \'textutil\' does not appear to be installed, '
47+
+ 'so textract will be unable to extract DOCs.';
5048
}
5149
cb( error === null, msg );
52-
}
53-
);
50+
});
5451
}
5552

5653
if ( os.platform() === 'darwin' ) {

lib/extractors/doc.js

+12-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
var exec = require( 'child_process' ).exec
1+
var { exec } = require( 'child_process' )
22
, os = require( 'os' )
33
, path = require( 'path' )
44
, util = require( '../util' )
5-
, types
6-
;
5+
, types;
76

87
function extractText( filePath, options, cb ) {
98
var execOptions = util.createExecOptions( 'doc', options );
@@ -14,18 +13,17 @@ function extractText( filePath, options, cb ) {
1413
var err;
1514
if ( error ) {
1615
if ( error.toString().indexOf( 'is not a Word Document' ) > 0 ) {
17-
err = new Error( 'file named [[ ' + path.basename( filePath ) +
18-
' ]] does not appear to really be a .doc file' );
16+
err = new Error( 'file named [[ ' + path.basename( filePath )
17+
+ ' ]] does not appear to really be a .doc file' );
1918
} else {
20-
err = new Error( 'antiword read of file named [[ ' +
21-
path.basename( filePath ) + ' ]] failed: ' + error );
19+
err = new Error( 'antiword read of file named [[ '
20+
+ path.basename( filePath ) + ' ]] failed: ' + error );
2221
}
2322
cb( err, null );
2423
} else {
2524
cb( null, stdout.trim().replace( /\[pic\]/g, '' ) );
2625
}
27-
}
28-
);
26+
});
2927
}
3028

3129
function testForBinary( options, cb ) {
@@ -43,16 +41,15 @@ function testForBinary( options, cb ) {
4341
execOptions,
4442
function( error /* , stdout, stderr */ ) {
4543
var msg;
46-
if ( error !== null && error.message &&
47-
error.message.indexOf( 'not found' ) !== -1 ) {
48-
msg = 'INFO: \'antiword\' does not appear to be installed, ' +
49-
'so textract will be unable to extract DOCs.';
44+
if ( error !== null && error.message
45+
&& error.message.indexOf( 'not found' ) !== -1 ) {
46+
msg = 'INFO: \'antiword\' does not appear to be installed, '
47+
+ 'so textract will be unable to extract DOCs.';
5048
cb( false, msg );
5149
} else {
5250
cb( true );
5351
}
54-
}
55-
);
52+
});
5653
}
5754

5855
if ( os.platform() === 'darwin' ) {

lib/extractors/docx.js

+16-23
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,29 @@ var xpath = require( 'xpath' )
33
, yauzl = require( 'yauzl' )
44
, util = require( '../util' )
55
, includeRegex = /.xml$/
6-
, excludeRegex = /^(word\/media\/|word\/_rels\/)/
7-
;
6+
, excludeRegex = /^(word\/media\/|word\/_rels\/)/;
87

98
function _calculateExtractedText( inText, preserveLineBreaks ) {
109
var doc = new Dom().parseFromString( inText )
1110
, ps = xpath.select( "//*[local-name()='p']", doc )
12-
, text = ''
13-
;
14-
11+
, text = '';
1512
ps.forEach( function( paragraph ) {
1613
var ts
17-
, localText = ''
18-
;
19-
14+
, localText = '';
2015
paragraph = new Dom().parseFromString( paragraph.toString() );
2116
ts = xpath.select(
22-
"//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph );
17+
"//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph
18+
);
2319
ts.forEach( function( t ) {
2420
if ( t.localName === 't' && t.childNodes.length > 0 ) {
2521
localText += t.childNodes[0].data;
26-
} else {
27-
if ( t.localName === 'tab' ) {
22+
} else if ( t.localName === 'tab' ) {
23+
localText += ' ';
24+
} else if ( t.localName === 'br' ) {
25+
if ( preserveLineBreaks !== true ) {
2826
localText += ' ';
29-
} else if ( t.localName === 'br' ) {
30-
if ( preserveLineBreaks !== true ) {
31-
localText += ' ';
32-
} else {
33-
localText += '\n';
34-
}
27+
} else {
28+
localText += '\n';
3529
}
3630
}
3731
});
@@ -46,9 +40,7 @@ function extractText( filePath, options, cb ) {
4640

4741
yauzl.open( filePath, function( err, zipfile ) {
4842
var processEnd
49-
, processedEntries = 0
50-
;
51-
43+
, processedEntries = 0;
5244
if ( err ) {
5345
util.yauzlError( err, cb );
5446
return;
@@ -62,9 +54,10 @@ function extractText( filePath, options, cb ) {
6254
cb( null, text );
6355
} else {
6456
cb( new Error(
65-
'Extraction could not find content in file, are you' +
66-
' sure it is the mime type it says it is?' ),
67-
null );
57+
'Extraction could not find content in file, are you'
58+
+ ' sure it is the mime type it says it is?'
59+
),
60+
null );
6861
}
6962
}
7063
};

lib/extractors/dxf.js

+10-17
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,37 @@
1-
var exec = require( 'child_process' ).exec
1+
var { exec } = require( 'child_process' )
22
, path = require( 'path' )
3-
, util = require( '../util' )
4-
;
3+
, util = require( '../util' );
54

65
function extractText( filePath, options, cb ) {
76
var execOptions = util.createExecOptions( 'dxf', options )
8-
, escapedPath = filePath.replace( /\s/g, '\\ ' )
9-
;
10-
7+
, escapedPath = filePath.replace( /\s/g, '\\ ' );
118
exec( 'drawingtotext ' + escapedPath,
129
execOptions,
1310
function( error, stdout, stderr ) {
1411
if ( stderr !== '' ) {
15-
error = new Error( 'error extracting DXF text ' +
16-
path.basename( filePath ) + ': ' + stderr );
12+
error = new Error( 'error extracting DXF text '
13+
+ path.basename( filePath ) + ': ' + stderr );
1714
cb( error, null );
1815
return;
1916
}
2017

2118
cb( null, stdout );
22-
}
23-
);
19+
});
2420
}
2521

2622
function testForBinary( options, cb ) {
2723
exec( 'drawingtotext notalegalfile',
2824
function( error, stdout, stderr ) {
2925
var msg
30-
, errorRegex = /I couldn't make sense of your input/
31-
;
32-
26+
, errorRegex = /I couldn't make sense of your input/;
3327
if ( !( stderr && errorRegex.test( stderr ) ) ) {
34-
msg = 'INFO: \'drawingtotext\' does not appear to be installed, ' +
35-
'so textract will be unable to extract DXFs.';
28+
msg = 'INFO: \'drawingtotext\' does not appear to be installed, '
29+
+ 'so textract will be unable to extract DXFs.';
3630
cb( false, msg );
3731
} else {
3832
cb( true );
3933
}
40-
}
41-
);
34+
});
4235
}
4336

4437
module.exports = {

lib/extractors/epub.js

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
var EPub = require( 'epub2/node' )
2-
, htmlExtract = require( './html' )
3-
;
2+
, htmlExtract = require( './html' );
43

54
function extractText( filePath, options, cb ) {
65
var epub = new EPub( filePath )
76
, allText = ''
87
, hasError = false
9-
, chapterCount = 0
10-
;
11-
8+
, chapterCount = 0;
129
epub.on( 'end', function() {
1310
// Iterate over each chapter...
1411
epub.flow.forEach( function( chapter ) {

lib/extractors/html.js

+4-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/* eslint-disable max-len */
22

33
var cheerio = require( 'cheerio' )
4-
, fs = require( 'fs' )
5-
;
4+
, fs = require( 'fs' );
65

76
function getTextWithAlt( $, $element ) {
87
if ( !$element ) {
@@ -31,8 +30,8 @@ function getTextWithAlt( $, $element ) {
3130
}
3231
return returnText;
3332
})
34-
.get()
35-
.join( '' );
33+
.get()
34+
.join( '' );
3635
}
3736

3837
function extractFromText( data, options, cb ) {
@@ -66,8 +65,7 @@ function extractFromText( data, options, cb ) {
6665
.replace( /(\r\u00A0|\u00A0\r|\r | \r)+/g, '\n' )
6766
.replace( /(\v\u00A0|\u00A0\v|\v | \v)+/g, '\n' )
6867
.replace( /(\t\u00A0|\u00A0\t|\t | \t)+/g, '\n' )
69-
.replace( /[\n\r\t\v]+/g, '\n' )
70-
;
68+
.replace( /[\n\r\t\v]+/g, '\n' );
7169
} catch ( err ) {
7270
cb( err, null );
7371
return;

lib/extractors/images.js

+8-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
var exec = require( 'child_process' ).exec
2-
, util = require( '../util' )
3-
;
1+
var { exec } = require( 'child_process' )
2+
, util = require( '../util' );
43

54
function tesseractExtractionCommand( options, inputFile, outputFile ) {
65
var cmd = 'tesseract ' + inputFile + ' ' + outputFile;
@@ -26,17 +25,16 @@ function testForBinary( options, cb ) {
2625
function( error, stdout, stderr ) {
2726
var msg;
2827
// checking for content of help text
29-
if ( ( error && error.toString().indexOf( 'Usage:' ) > -1 ) ||
30-
( stderr && stderr.toString().indexOf( 'Usage:' ) > -1 ) ||
31-
( stdout && stdout.toString().indexOf( 'Usage:' ) > -1 ) ) {
28+
if ( ( error && error.toString().indexOf( 'Usage:' ) > -1 )
29+
|| ( stderr && stderr.toString().indexOf( 'Usage:' ) > -1 )
30+
|| ( stdout && stdout.toString().indexOf( 'Usage:' ) > -1 ) ) {
3231
cb( true );
3332
} else {
34-
msg = 'INFO: \'tesseract\' does not appear to be installed, ' +
35-
'so textract will be unable to extract images.';
33+
msg = 'INFO: \'tesseract\' does not appear to be installed, '
34+
+ 'so textract will be unable to extract images.';
3635
cb( false, msg );
3736
}
38-
}
39-
);
37+
});
4038
}
4139

4240
module.exports = {

lib/extractors/md.js

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
var fs = require( 'fs' )
22
, marked = require( 'marked' )
3-
, htmlExtract = require( './html' )
4-
;
3+
, htmlExtract = require( './html' );
54

65
function extractText( filePath, options, cb ) {
76
fs.readFile( filePath, function( error, data ) {

0 commit comments

Comments
 (0)