diff --git a/lib/docsplit.rb b/lib/docsplit.rb index 5001413..0a8f91b 100755 --- a/lib/docsplit.rb +++ b/lib/docsplit.rb @@ -7,7 +7,10 @@ module Docsplit VERSION = '0.7.2' # Keep in sync with gemspec. - ESCAPE = lambda {|x| Shellwords.shellescape(x) } + HOST_OS = (defined?("RbConfig") ? RbConfig : Config)::CONFIG['host_os'] + IS_WIN = !!HOST_OS.match(/mswin|msys|mingw|cygwin|bccwin|wince|emc/i) + + ESCAPE = IS_WIN ? lambda {|x| "\"#{x}\"" } : lambda {|x| Shellwords.shellescape(x) } ROOT = File.expand_path(File.dirname(__FILE__) + '/..') ESCAPED_ROOT = ESCAPE[ROOT] diff --git a/lib/docsplit/image_extractor.rb b/lib/docsplit/image_extractor.rb index 8c29bbc..fbdf167 100755 --- a/lib/docsplit/image_extractor.rb +++ b/lib/docsplit/image_extractor.rb @@ -36,13 +36,25 @@ def convert(pdf, size, format, previous=nil) FileUtils.mkdir_p(directory) unless File.exists?(directory) common = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}" if previous - FileUtils.cp(Dir[directory_for(previous) + '/*'], directory) - result = `MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1`.chomp + # Only copy image files, skip other files such as Thumbs.db under windows platform + imageFiles = File.join(directory_for(previous), '*.' + format) + FileUtils.cp(Dir.glob(imageFiles), directory) + if IS_WIN + cmd = "set MAGICK_TMPDIR=#{tempdir} & set OMP_NUM_THREADS=2 & gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1".chomp + else + cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1".chomp + end + result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 else page_list(pages).each do |page| - out_file = ESCAPE[File.join(directory, "#{basename}_#{page}.#{format}")] - cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] #{out_file} 2>&1".chomp + if IS_WIN + out_file = File.join(directory, "#{basename}_#{page}.#{format}") + cmd = "set MAGICK_TMPDIR=#{tempdir} & set OMP_NUM_THREADS=2 & gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] \"#{out_file}\" 2>&1".chomp + else + out_file = ESCAPE[File.join(directory, "#{basename}_#{page}.#{format}")] + cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] #{out_file} 2>&1".chomp + end result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 end diff --git a/lib/docsplit/pdf_extractor.rb b/lib/docsplit/pdf_extractor.rb index 10fd5fc..9b89764 100644 --- a/lib/docsplit/pdf_extractor.rb +++ b/lib/docsplit/pdf_extractor.rb @@ -7,7 +7,7 @@ class PdfExtractor # Provide a set of helper functions to determine the OS. HOST_OS = (defined?("RbConfig") ? RbConfig : Config)::CONFIG['host_os'] def windows? - !!HOST_OS.match(/mswin|windows|cygwin/i) + IS_WIN end def osx? !!HOST_OS.match(/darwin/i) @@ -22,7 +22,13 @@ def version_string @@help ||= `#{office_executable} -h 2>&1`.split("\n").first end def libre_office? - !!version_string.match(/^LibreOffice/) + if windows? + # on windows platform we can't get version string by 'version_string' func, + # so we simply match the executable path + !!office_executable.match(/libreOffice/i) + else + !!version_string.match(/^LibreOffice/) + end end def open_office? !!version_string.match(/^OpenOffice.org/) @@ -116,7 +122,14 @@ def extract(docs, opts) ENV['SYSUSERCONFIG']="file://#{File.expand_path(escaped_out)}" options = "--headless --invisible --norestore --nolockcheck --convert-to pdf --outdir #{escaped_out} #{escaped_doc}" - cmd = "#{office_executable} #{options} 2>&1" + + # quote path on windows platform to avoid wrong path issue + if windows? + cmd = "\"#{office_executable}\" #{options} 2>&1" + else + cmd = "#{office_executable} #{options} 2>&1" + end + result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 true @@ -141,7 +154,14 @@ def run_jod(command, pdfs, opts, return_output=false) pdfs = [pdfs].flatten.map{|pdf| "\"#{pdf}\""}.join(' ') office = osx? ? "-Doffice.home=#{office_path}" : office_path - cmd = "java #{HEADLESS} #{LOGGING} #{office} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1" + + # quote path on windows platform to avoid wrong path issue + if windows? + cmd = "java #{HEADLESS} #{LOGGING} \"#{office}\" -cp #{CLASSPATH} #{command} #{pdfs} 2>&1" + else + cmd = "java #{HEADLESS} #{LOGGING} #{office} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1" + end + result = `#{cmd}`.chomp raise ExtractionFailed, result if $? != 0 return return_output ? (result.empty? ? nil : result) : true