nosferatu500
diff --git a/‎bin/textract
+1-1 b/‎bin/textract
+1-1
diff --git a/‎lib/extract.js
+8-11 b/‎lib/extract.js
+8-11
diff --git a/‎lib/extractors/doc-osx.js
+8-11 b/‎lib/extractors/doc-osx.js
+8-11
diff --git a/‎lib/extractors/doc.js
+12-15 b/‎lib/extractors/doc.js
+12-15
diff --git a/‎lib/extractors/docx.js
+16-23 b/‎lib/extractors/docx.js
+16-23
diff --git a/‎lib/extractors/dxf.js
+10-17 b/‎lib/extractors/dxf.js
+10-17
diff --git a/‎lib/extractors/epub.js
+2-5 b/‎lib/extractors/epub.js
+2-5
diff --git a/‎lib/extractors/html.js
+4-6 b/‎lib/extractors/html.js
+4-6
diff --git a/‎lib/extractors/images.js
+8-10 b/‎lib/extractors/images.js
+8-10
diff --git a/‎lib/extractors/md.js
+1-2 b/‎lib/extractors/md.js
+1-2
@@ -20,7 +20,7 @@ var path = require( 'path' )
 
 var cli = meow({
   help: help,
-  pkg: '../package.json'
+  pkg: require('../package.json')
 });
 
 if ( !cli.input || cli.input.length === 0 ) {
 
@@ -1,6 +1,6 @@
 var fs = require( 'fs' )
   , path = require( 'path' )
-  , XmlEntities = require( 'html-entities' ).XmlEntities
+  , { XmlEntities } = require( 'html-entities' )
   , util = require( './util' )
   , extractorPath = path.join( __dirname, 'extractors' )
   , entities = new XmlEntities()
@@ -11,8 +11,8 @@ var fs = require( 'fs' )
   , satisfiedExtractors = 0
   , hasInitialized = false
   , STRIP_ONLY_SINGLE_LINEBREAKS = /(^|[^\n])\n(?!\n)/g
-  , WHITELIST_PRESERVE_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50–\uFDFF \uFE70–\uFEFF \uFF01-\uFFE6 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w\n\r]*/g  // eslint-disable-line max-len
-  , WHITELIST_STRIP_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50–\uFDFF \uFE70–\uFEFF \uFF01-\uFFE6 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w]*/g  // eslint-disable-line max-len
+  , WHITELIST_PRESERVE_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50–\uFDFF \uFE70–\uFEFF \uFF01-\uFFE6 \.,\?'""„«»!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w\n\r]*/g // eslint-disable-line max-len
+  , WHITELIST_STRIP_LINEBREAKS = /[^A-Za-z\x80-\xFF\x24\u20AC\xA3\xA5 0-9 \u2015\u2116\u2018\u2019\u201C|\u201D\u2026 \uFF0C \u2013 \u2014 \u00C0-\u1FFF \u2C00-\uD7FF \uFB50–\uFDFF \uFE70–\uFEFF \uFF01-\uFFE6 \.,\?'""„«»!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w]*/g // eslint-disable-line max-len
   ;
 
 function registerExtractor( extractor ) {
@@ -21,10 +21,8 @@ function registerExtractor( extractor ) {
       if ( typeof type === 'string' ) {
         type = type.toLowerCase();
         typeExtractors[type] = extractor.extract;
-      } else {
-        if ( type instanceof RegExp ) {
-          regexExtractors.push({ reg: type, extractor: extractor.extract });
-        }
+      } else if ( type instanceof RegExp ) {
+        regexExtractors.push({ reg: type, extractor: extractor.extract });
       }
     });
   }
@@ -107,8 +105,7 @@ function findExtractor( type ) {
   var i
     , iLen = regexExtractors.length
     , extractor
-    , regexExtractor
-    ;
+    , regexExtractor;
 
   type = type.toLowerCase();
   if ( typeExtractors[type] ) {
@@ -144,8 +141,8 @@ function extract( type, filePath, options, cb ) {
 
       // update error message if type is supported but just not configured/installed properly
       if ( failedExtractorTypes[type] ) {
-        msg += ', extractor for type exists, but failed to initialize.' +
-          ' Message: ' + failedExtractorTypes[type];
+        msg += ', extractor for type exists, but failed to initialize.'
+          + ' Message: ' + failedExtractorTypes[type];
       }
 
       error = new Error( msg );
 
@@ -1,5 +1,5 @@
-var spawn = require( 'child_process' ).spawn
-  , exec = require( 'child_process' ).exec
+var { spawn } = require( 'child_process' )
+  , { exec } = require( 'child_process' )
   , os = require( 'os' )
   , path = require( 'path' )
   , types
@@ -9,9 +9,7 @@ var spawn = require( 'child_process' ).spawn
 function extractText( filePath, options, cb ) {
   var result = ''
     , error = null
-    , textutil = spawn( 'textutil', ['-convert', 'txt', '-stdout', filePath] )
-    ;
-
+    , textutil = spawn( 'textutil', ['-convert', 'txt', '-stdout', filePath] );
   textutil.stdout.on( 'data', function( buffer ) {
     result += buffer.toString();
   });
@@ -25,8 +23,8 @@ function extractText( filePath, options, cb ) {
 
   textutil.on( 'close', function( /* code */ ) {
     if ( error ) {
-      error = new Error( 'textutil read of file named [[ ' +
-        path.basename( filePath ) + ' ]] failed: ' + error );
+      error = new Error( 'textutil read of file named [[ '
+        + path.basename( filePath ) + ' ]] failed: ' + error );
       cb( error, null );
       return;
     }
@@ -45,12 +43,11 @@ function testForBinary( options, cb ) {
     function( error /* , stdout, stderr */ ) {
       var msg;
       if ( error !== null ) {
-        msg = 'INFO: \'textutil\' does not appear to be installed, ' +
-         'so textract will be unable to extract DOCs.';
+        msg = 'INFO: \'textutil\' does not appear to be installed, '
+         + 'so textract will be unable to extract DOCs.';
       }
       cb( error === null, msg );
-    }
-  );
+    });
 }
 
 if ( os.platform() === 'darwin' ) {
 
@@ -1,9 +1,8 @@
-var exec = require( 'child_process' ).exec
+var { exec } = require( 'child_process' )
   , os = require( 'os' )
   , path = require( 'path' )
   , util = require( '../util' )
-  , types
-  ;
+  , types;
 
 function extractText( filePath, options, cb ) {
   var execOptions = util.createExecOptions( 'doc', options );
@@ -14,18 +13,17 @@ function extractText( filePath, options, cb ) {
       var err;
       if ( error ) {
         if ( error.toString().indexOf( 'is not a Word Document' ) > 0 ) {
-          err = new Error( 'file named [[ ' + path.basename( filePath ) +
-            ' ]] does not appear to really be a .doc file' );
+          err = new Error( 'file named [[ ' + path.basename( filePath )
+            + ' ]] does not appear to really be a .doc file' );
         } else {
-          err = new Error( 'antiword read of file named [[ ' +
-            path.basename( filePath ) + ' ]] failed: ' + error );
+          err = new Error( 'antiword read of file named [[ '
+            + path.basename( filePath ) + ' ]] failed: ' + error );
         }
         cb( err, null );
       } else {
         cb( null, stdout.trim().replace( /\[pic\]/g, '' ) );
       }
-    }
-  );
+    });
 }
 
 function testForBinary( options, cb ) {
@@ -43,16 +41,15 @@ function testForBinary( options, cb ) {
     execOptions,
     function( error /* , stdout, stderr */ ) {
       var msg;
-      if ( error !== null && error.message &&
-        error.message.indexOf( 'not found' ) !== -1 ) {
-        msg = 'INFO: \'antiword\' does not appear to be installed, ' +
-         'so textract will be unable to extract DOCs.';
+      if ( error !== null && error.message
+        && error.message.indexOf( 'not found' ) !== -1 ) {
+        msg = 'INFO: \'antiword\' does not appear to be installed, '
+         + 'so textract will be unable to extract DOCs.';
         cb( false, msg );
       } else {
         cb( true );
       }
-    }
-  );
+    });
 }
 
 if ( os.platform() === 'darwin' ) {
 
@@ -3,35 +3,29 @@ var xpath = require( 'xpath' )
   , yauzl = require( 'yauzl' )
   , util = require( '../util' )
   , includeRegex = /.xml$/
-  , excludeRegex = /^(word\/media\/|word\/_rels\/)/
-  ;
+  , excludeRegex = /^(word\/media\/|word\/_rels\/)/;
 
 function _calculateExtractedText( inText, preserveLineBreaks ) {
   var doc = new Dom().parseFromString( inText )
     , ps = xpath.select( "//*[local-name()='p']", doc )
-    , text = ''
-    ;
-
+    , text = '';
   ps.forEach( function( paragraph ) {
     var ts
-      , localText = ''
-      ;
-
+      , localText = '';
     paragraph = new Dom().parseFromString( paragraph.toString() );
     ts = xpath.select(
-      "//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph );
+      "//*[local-name()='t' or local-name()='tab' or local-name()='br']", paragraph
+    );
     ts.forEach( function( t ) {
       if ( t.localName === 't' && t.childNodes.length > 0 ) {
         localText += t.childNodes[0].data;
-      } else {
-        if ( t.localName === 'tab' ) {
+      } else if ( t.localName === 'tab' ) {
+        localText += ' ';
+      } else if ( t.localName === 'br' ) {
+        if ( preserveLineBreaks !== true ) {
           localText += ' ';
-        } else if ( t.localName === 'br' ) {
-          if ( preserveLineBreaks !== true ) {
-            localText += ' ';
-          } else {
-            localText += '\n';
-          }
+        } else {
+          localText += '\n';
         }
       }
     });
@@ -46,9 +40,7 @@ function extractText( filePath, options, cb ) {
 
   yauzl.open( filePath, function( err, zipfile ) {
     var processEnd
-      , processedEntries = 0
-      ;
-
+      , processedEntries = 0;
     if ( err ) {
       util.yauzlError( err, cb );
       return;
@@ -62,9 +54,10 @@ function extractText( filePath, options, cb ) {
           cb( null, text );
         } else {
           cb( new Error(
-            'Extraction could not find content in file, are you' +
-            ' sure it is the mime type it says it is?' ),
-            null );
+            'Extraction could not find content in file, are you'
+            + ' sure it is the mime type it says it is?'
+          ),
+          null );
         }
       }
     };
 
@@ -1,44 +1,37 @@
-var exec = require( 'child_process' ).exec
+var { exec } = require( 'child_process' )
   , path = require( 'path' )
-  , util = require( '../util' )
-  ;
+  , util = require( '../util' );
 
 function extractText( filePath, options, cb ) {
   var execOptions = util.createExecOptions( 'dxf', options )
-    , escapedPath = filePath.replace( /\s/g, '\\ ' )
-    ;
-
+    , escapedPath = filePath.replace( /\s/g, '\\ ' );
   exec( 'drawingtotext ' + escapedPath,
     execOptions,
     function( error, stdout, stderr ) {
       if ( stderr !== '' ) {
-        error = new Error( 'error extracting DXF text ' +
-          path.basename( filePath ) + ': ' + stderr );
+        error = new Error( 'error extracting DXF text '
+          + path.basename( filePath ) + ': ' + stderr );
         cb( error, null );
         return;
       }
 
       cb( null, stdout );
-    }
-  );
+    });
 }
 
 function testForBinary( options, cb ) {
   exec( 'drawingtotext notalegalfile',
     function( error, stdout, stderr ) {
       var msg
-        , errorRegex = /I couldn't make sense of your input/
-        ;
-
+        , errorRegex = /I couldn't make sense of your input/;
       if ( !( stderr && errorRegex.test( stderr ) ) ) {
-        msg = 'INFO: \'drawingtotext\' does not appear to be installed, ' +
-          'so textract will be unable to extract DXFs.';
+        msg = 'INFO: \'drawingtotext\' does not appear to be installed, '
+          + 'so textract will be unable to extract DXFs.';
         cb( false, msg );
       } else {
         cb( true );
       }
-    }
-  );
+    });
 }
 
 module.exports = {
 
@@ -1,14 +1,11 @@
 var EPub = require( 'epub2/node' )
-  , htmlExtract = require( './html' )
-  ;
+  , htmlExtract = require( './html' );
 
 function extractText( filePath, options, cb ) {
   var epub = new EPub( filePath )
     , allText = ''
     , hasError = false
-    , chapterCount = 0
-    ;
-
+    , chapterCount = 0;
   epub.on( 'end', function() {
     // Iterate over each chapter...
     epub.flow.forEach( function( chapter ) {
 
@@ -1,8 +1,7 @@
 /* eslint-disable max-len */
 
 var cheerio = require( 'cheerio' )
-  , fs = require( 'fs' )
-  ;
+  , fs = require( 'fs' );
 
 function getTextWithAlt( $, $element ) {
   if ( !$element ) {
@@ -31,8 +30,8 @@ function getTextWithAlt( $, $element ) {
     }
     return returnText;
   })
-  .get()
-  .join( '' );
+    .get()
+    .join( '' );
 }
 
 function extractFromText( data, options, cb ) {
@@ -66,8 +65,7 @@ function extractFromText( data, options, cb ) {
       .replace( /(\r\u00A0|\u00A0\r|\r | \r)+/g, '\n' )
       .replace( /(\v\u00A0|\u00A0\v|\v | \v)+/g, '\n' )
       .replace( /(\t\u00A0|\u00A0\t|\t | \t)+/g, '\n' )
-      .replace( /[\n\r\t\v]+/g, '\n' )
-      ;
+      .replace( /[\n\r\t\v]+/g, '\n' );
   } catch ( err ) {
     cb( err, null );
     return;
 
@@ -1,6 +1,5 @@
-var exec = require( 'child_process' ).exec
-  , util = require( '../util' )
-  ;
+var { exec } = require( 'child_process' )
+  , util = require( '../util' );
 
 function tesseractExtractionCommand( options, inputFile, outputFile ) {
   var cmd = 'tesseract ' + inputFile + ' ' + outputFile;
@@ -26,17 +25,16 @@ function testForBinary( options, cb ) {
     function( error, stdout, stderr ) {
       var msg;
       // checking for content of help text
-      if ( ( error && error.toString().indexOf( 'Usage:' ) > -1 ) ||
-          ( stderr && stderr.toString().indexOf( 'Usage:' ) > -1 ) ||
-          ( stdout && stdout.toString().indexOf( 'Usage:' ) > -1 ) ) {
+      if ( ( error && error.toString().indexOf( 'Usage:' ) > -1 )
+          || ( stderr && stderr.toString().indexOf( 'Usage:' ) > -1 )
+          || ( stdout && stdout.toString().indexOf( 'Usage:' ) > -1 ) ) {
         cb( true );
       } else {
-        msg = 'INFO: \'tesseract\' does not appear to be installed, ' +
-         'so textract will be unable to extract images.';
+        msg = 'INFO: \'tesseract\' does not appear to be installed, '
+         + 'so textract will be unable to extract images.';
         cb( false, msg );
       }
-    }
-  );
+    });
 }
 
 module.exports = {
 
@@ -1,7 +1,6 @@
 var fs = require( 'fs' )
   , marked = require( 'marked' )
-  , htmlExtract = require( './html' )
-  ;
+  , htmlExtract = require( './html' );
 
 function extractText( filePath, options, cb ) {
   fs.readFile( filePath, function( error, data ) {