|
1 | 1 | import sys |
| 2 | +import time |
| 3 | +from typing import Tuple, List |
2 | 4 |
|
3 | 5 | from pathlib import Path |
4 | 6 | from pywintypes import com_error |
@@ -29,77 +31,103 @@ def __exit__(self, exc_type, exc_value, exc_traceback): |
29 | 31 | if self.doc is None: |
30 | 32 | return |
31 | 33 |
|
32 | | - self.doc.Close(0) |
| 34 | + retry_delay_seconds = 0.25 |
| 35 | + retry_n_times = int(60 / retry_delay_seconds) |
| 36 | + for _ in range(retry_n_times): |
| 37 | + try: |
| 38 | + self.doc.Close(0) |
| 39 | + except com_error as e: |
| 40 | + if "rejected by callee" in e.strerror.lower(): |
| 41 | + time.sleep(retry_delay_seconds) |
| 42 | + continue |
| 43 | + raise |
33 | 44 |
|
| 45 | + break |
34 | 46 |
|
35 | | -def get_Word(visible=False): |
| 47 | + |
| 48 | +def get_Word(): |
36 | 49 | app = "Word.Application" |
37 | | - was_open = False |
38 | | - word = None |
39 | | - try: |
40 | | - word = win32com.client.GetActiveObject(app) |
41 | | - was_open = True |
42 | | - except com_error: |
43 | | - word = win32com.client.gencache.EnsureDispatch(app) |
44 | | - word.Visible = visible |
45 | 50 |
|
46 | | - if word is None: |
47 | | - raise Exception("Unable to open MS Word") |
| 51 | + word = win32com.client.DispatchEx(app) |
| 52 | + word.Visible = False |
48 | 53 |
|
49 | | - return word, was_open |
| 54 | + return word |
50 | 55 |
|
51 | 56 |
|
52 | | -def windows(paths): |
53 | | - word, was_open = get_Word(visible=False) |
| 57 | +def windows(input_paths: List[Path], output_paths: List[Path], word=None) -> List[Path]: |
| 58 | + got_our_own_word = False |
| 59 | + if word is None: |
| 60 | + word = get_Word() |
| 61 | + got_our_own_word = True |
54 | 62 | wdFormatPDF = 17 |
| 63 | + |
55 | 64 | try: |
56 | | - if paths["batch"]: |
57 | | - for docx_filepath in sorted(Path(paths["input"]).glob("[!~]*.doc*")): |
58 | | - pdf_filepath = Path(paths["output"]) / ( |
59 | | - str(docx_filepath.stem) + ".pdf" |
60 | | - ) |
61 | | - with WordDocument(word, docx_filepath) as doc: |
62 | | - doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF) |
| 65 | + for docx_inp, pdf_out in zip(input_paths, output_paths): |
| 66 | + with WordDocument(word, docx_inp) as doc: |
| 67 | + retry_delay_seconds = 0.25 |
| 68 | + retry_n_times = int(60 / retry_delay_seconds) |
| 69 | + for _ in range(retry_n_times): |
| 70 | + try: |
| 71 | + doc.SaveAs(str(pdf_out), FileFormat=wdFormatPDF) |
| 72 | + except com_error as e: |
| 73 | + if "rejected by callee" in e.strerror.lower(): |
| 74 | + time.sleep(retry_delay_seconds) |
| 75 | + continue |
| 76 | + raise |
| 77 | + |
| 78 | + break |
63 | 79 |
|
64 | | - else: |
65 | | - docx_filepath = Path(paths["input"]).resolve() |
66 | | - pdf_filepath = Path(paths["output"]).resolve() |
67 | | - with WordDocument(word, docx_filepath) as doc: |
68 | | - doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF) |
69 | 80 | finally: |
70 | | - if not was_open: |
| 81 | + if got_our_own_word: |
71 | 82 | word.Quit() |
72 | 83 |
|
| 84 | + return output_paths |
73 | 85 |
|
74 | | -def resolve_paths(input_path, output_path): |
75 | | - input_path = Path(input_path).resolve() |
76 | | - output_path = Path(output_path).resolve() if output_path else None |
77 | | - output = {} |
78 | | - if input_path.is_dir(): |
79 | | - output["batch"] = True |
80 | | - output["input"] = str(input_path) |
81 | | - if output_path: |
82 | | - assert output_path.is_dir() |
| 86 | + |
| 87 | +def resolve_paths(input_path, output_path) -> Tuple[List[Path], List[Path]]: |
| 88 | + _inp = input_path |
| 89 | + _out = output_path |
| 90 | + |
| 91 | + # prep input |
| 92 | + if isinstance(input_path, (str, Path)): |
| 93 | + input_path = Path(input_path) |
| 94 | + if input_path.is_dir(): |
| 95 | + input_path = sorted(input_path.glob("[!~]*.doc*")) |
83 | 96 | else: |
84 | | - output_path = str(input_path) |
85 | | - output["output"] = output_path |
| 97 | + input_path = [input_path] |
| 98 | + if isinstance(input_path, list): |
| 99 | + input_path = [ |
| 100 | + Path(p).resolve() for p in input_path if Path(p).suffix in [".docx", ".doc"] |
| 101 | + ] |
| 102 | + assert len(input_path) > 0, (_inp, input_path) |
86 | 103 | else: |
87 | | - output["batch"] = False |
88 | | - assert str(input_path).lower().endswith((".docx", ".doc")) |
89 | | - output["input"] = str(input_path) |
90 | | - if output_path and output_path.is_dir(): |
91 | | - output_path = str(output_path / (str(input_path.stem) + ".pdf")) |
92 | | - elif output_path: |
93 | | - assert str(output_path).endswith(".pdf") |
94 | | - else: |
95 | | - output_path = str(input_path.parent / (str(input_path.stem) + ".pdf")) |
96 | | - output["output"] = output_path |
97 | | - return output |
| 104 | + raise ValueError(f"unexpected input for `input_path`:{_inp}") |
| 105 | + |
| 106 | + # prep output |
| 107 | + if output_path is None: |
| 108 | + output_path = [ |
| 109 | + input_path[0].parent / (str(inp.stem) + ".pdf") for inp in input_path |
| 110 | + ] |
| 111 | + |
| 112 | + if isinstance(output_path, (str, Path)): |
| 113 | + output_path = Path(output_path) |
| 114 | + if output_path.is_dir(): |
| 115 | + output_path = [output_path / (str(inp.stem) + ".pdf") for inp in input_path] |
| 116 | + |
| 117 | + if isinstance(output_path, list): |
| 118 | + output_path = [ |
| 119 | + Path(p).resolve() for p in output_path if Path(p).suffix in [".pdf"] |
| 120 | + ] |
| 121 | + assert len(output_path) == len(input_path), (_out, output_path) |
| 122 | + else: |
| 123 | + raise ValueError(f"unexpected input for `output_path`:{_out}") |
| 124 | + |
| 125 | + return input_path, output_path |
98 | 126 |
|
99 | 127 |
|
100 | | -def convert(input_path, output_path=None): |
101 | | - paths = resolve_paths(input_path, output_path) |
| 128 | +def convert(input_path, output_path=None, word=None): |
| 129 | + inps, outs = resolve_paths(input_path, output_path) |
102 | 130 | if sys.platform == "win32": |
103 | | - return windows(paths) |
| 131 | + return windows(inps, outs, word=word) |
104 | 132 | else: |
105 | 133 | raise NotImplementedError("Not implemented for linux or darwin systems.") |
0 commit comments