Skip to content

Commit 55b29dc

Browse files
committed
convert list of files
1 parent 3ee16de commit 55b29dc

File tree

1 file changed

+81
-53
lines changed

1 file changed

+81
-53
lines changed

agolutils/convert.py

Lines changed: 81 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import sys
2+
import time
3+
from typing import Tuple, List
24

35
from pathlib import Path
46
from pywintypes import com_error
@@ -29,77 +31,103 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
2931
if self.doc is None:
3032
return
3133

32-
self.doc.Close(0)
34+
retry_delay_seconds = 0.25
35+
retry_n_times = int(60 / retry_delay_seconds)
36+
for _ in range(retry_n_times):
37+
try:
38+
self.doc.Close(0)
39+
except com_error as e:
40+
if "rejected by callee" in e.strerror.lower():
41+
time.sleep(retry_delay_seconds)
42+
continue
43+
raise
3344

45+
break
3446

35-
def get_Word(visible=False):
47+
48+
def get_Word():
3649
app = "Word.Application"
37-
was_open = False
38-
word = None
39-
try:
40-
word = win32com.client.GetActiveObject(app)
41-
was_open = True
42-
except com_error:
43-
word = win32com.client.gencache.EnsureDispatch(app)
44-
word.Visible = visible
4550

46-
if word is None:
47-
raise Exception("Unable to open MS Word")
51+
word = win32com.client.DispatchEx(app)
52+
word.Visible = False
4853

49-
return word, was_open
54+
return word
5055

5156

52-
def windows(paths):
53-
word, was_open = get_Word(visible=False)
57+
def windows(input_paths: List[Path], output_paths: List[Path], word=None) -> List[Path]:
58+
got_our_own_word = False
59+
if word is None:
60+
word = get_Word()
61+
got_our_own_word = True
5462
wdFormatPDF = 17
63+
5564
try:
56-
if paths["batch"]:
57-
for docx_filepath in sorted(Path(paths["input"]).glob("[!~]*.doc*")):
58-
pdf_filepath = Path(paths["output"]) / (
59-
str(docx_filepath.stem) + ".pdf"
60-
)
61-
with WordDocument(word, docx_filepath) as doc:
62-
doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
65+
for docx_inp, pdf_out in zip(input_paths, output_paths):
66+
with WordDocument(word, docx_inp) as doc:
67+
retry_delay_seconds = 0.25
68+
retry_n_times = int(60 / retry_delay_seconds)
69+
for _ in range(retry_n_times):
70+
try:
71+
doc.SaveAs(str(pdf_out), FileFormat=wdFormatPDF)
72+
except com_error as e:
73+
if "rejected by callee" in e.strerror.lower():
74+
time.sleep(retry_delay_seconds)
75+
continue
76+
raise
77+
78+
break
6379

64-
else:
65-
docx_filepath = Path(paths["input"]).resolve()
66-
pdf_filepath = Path(paths["output"]).resolve()
67-
with WordDocument(word, docx_filepath) as doc:
68-
doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
6980
finally:
70-
if not was_open:
81+
if got_our_own_word:
7182
word.Quit()
7283

84+
return output_paths
7385

74-
def resolve_paths(input_path, output_path):
75-
input_path = Path(input_path).resolve()
76-
output_path = Path(output_path).resolve() if output_path else None
77-
output = {}
78-
if input_path.is_dir():
79-
output["batch"] = True
80-
output["input"] = str(input_path)
81-
if output_path:
82-
assert output_path.is_dir()
86+
87+
def resolve_paths(input_path, output_path) -> Tuple[List[Path], List[Path]]:
88+
_inp = input_path
89+
_out = output_path
90+
91+
# prep input
92+
if isinstance(input_path, (str, Path)):
93+
input_path = Path(input_path)
94+
if input_path.is_dir():
95+
input_path = sorted(input_path.glob("[!~]*.doc*"))
8396
else:
84-
output_path = str(input_path)
85-
output["output"] = output_path
97+
input_path = [input_path]
98+
if isinstance(input_path, list):
99+
input_path = [
100+
Path(p).resolve() for p in input_path if Path(p).suffix in [".docx", ".doc"]
101+
]
102+
assert len(input_path) > 0, (_inp, input_path)
86103
else:
87-
output["batch"] = False
88-
assert str(input_path).lower().endswith((".docx", ".doc"))
89-
output["input"] = str(input_path)
90-
if output_path and output_path.is_dir():
91-
output_path = str(output_path / (str(input_path.stem) + ".pdf"))
92-
elif output_path:
93-
assert str(output_path).endswith(".pdf")
94-
else:
95-
output_path = str(input_path.parent / (str(input_path.stem) + ".pdf"))
96-
output["output"] = output_path
97-
return output
104+
raise ValueError(f"unexpected input for `input_path`:{_inp}")
105+
106+
# prep output
107+
if output_path is None:
108+
output_path = [
109+
input_path[0].parent / (str(inp.stem) + ".pdf") for inp in input_path
110+
]
111+
112+
if isinstance(output_path, (str, Path)):
113+
output_path = Path(output_path)
114+
if output_path.is_dir():
115+
output_path = [output_path / (str(inp.stem) + ".pdf") for inp in input_path]
116+
117+
if isinstance(output_path, list):
118+
output_path = [
119+
Path(p).resolve() for p in output_path if Path(p).suffix in [".pdf"]
120+
]
121+
assert len(output_path) == len(input_path), (_out, output_path)
122+
else:
123+
raise ValueError(f"unexpected input for `output_path`:{_out}")
124+
125+
return input_path, output_path
98126

99127

100-
def convert(input_path, output_path=None):
101-
paths = resolve_paths(input_path, output_path)
128+
def convert(input_path, output_path=None, word=None):
129+
inps, outs = resolve_paths(input_path, output_path)
102130
if sys.platform == "win32":
103-
return windows(paths)
131+
return windows(inps, outs, word=word)
104132
else:
105133
raise NotImplementedError("Not implemented for linux or darwin systems.")

0 commit comments

Comments
 (0)