Skip to content

Commit eb47429

Browse files
committed
meta: python binding for HttPipe
1 parent 6cff982 commit eb47429

File tree

3 files changed

+216
-78
lines changed

3 files changed

+216
-78
lines changed

meta/bindings/python/papermuncher.py

+166-71
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,17 @@
11
import dataclasses as dc
2-
from email.message import Message
2+
import io
33
from pathlib import Path
4-
from email.parser import BytesParser
54
import subprocess
65
import tempfile
7-
from typing import IO
86
import magic
7+
import os
8+
import logging
99

10+
_logger = logging.getLogger(__name__)
1011

1112
class Loader:
12-
def handleRequest(
13-
self, url: str, headers: dict[str, str]
14-
) -> tuple[int, dict[str, str], bytes]:
15-
return (
16-
404,
17-
{
18-
"mime": "text/html",
19-
},
20-
b"<html><body>404 Not Found</body></html>",
21-
)
22-
13+
def handleRequest(self, url: str) -> tuple[dict[str, str], bytes]:
14+
raise NotImplementedError()
2315

2416
@dc.dataclass
2517
class StaticDir(Loader):
@@ -28,43 +20,141 @@ class StaticDir(Loader):
2820
def __init__(self, path: Path):
2921
self._path = path
3022

31-
def handleRequest(
32-
self, url: str, headers: dict[str, str]
33-
) -> tuple[int, dict[str, str], bytes]:
34-
path = self._path / url
23+
def handleRequest(self, url: str) -> tuple[dict[str, str], bytes]:
24+
# Http path starts with '/' what ends up being interpreted as the root of the FS when appending, so we rmv it
25+
path = self._path / url[1:]
26+
3527
if not path.exists():
36-
return (
37-
404,
38-
{
39-
"mime": "text/html",
40-
},
41-
b"<html><body>404 Not Found</body></html>",
42-
)
28+
raise FileNotFoundError()
4329
with open(path, "rb") as f:
4430
return (
45-
200,
4631
{
4732
"mime": magic.Magic(mime=True).from_file(path),
4833
},
4934
f.read(),
5035
)
5136

5237

38+
MAX_BUFFER_SIZE = 1024
39+
class HttpMessage():
40+
41+
def __init__(self):
42+
self.headers = {}
43+
44+
def _readHeaderLines(self, reader: io.TextIOWrapper) -> list[str]:
45+
lines = []
46+
47+
while True:
48+
request_line = reader.readline().decode('utf-8')
49+
50+
if len(request_line) == 0:
51+
raise EOFError("Input stream has ended")
52+
53+
if request_line == "\r\n":
54+
break
55+
56+
lines.append(request_line)
57+
58+
return lines
59+
60+
def _addToHeader(self, header_line: str) -> None:
61+
key, value = header_line.split(':')
62+
self.headers[key.strip()] = value.strip()
63+
64+
def readHeader(self, reader: io.TextIOWrapper) -> None:
65+
raise NotImplementedError()
66+
67+
def _readSingleChunk(self, reader: io.TextIOWrapper) -> bytes:
68+
def read_chunk_content(rem_size):
69+
chunk = b""
70+
71+
while rem_size > 0:
72+
bs = min(MAX_BUFFER_SIZE, rem_size)
73+
byte = reader.read(bs)
74+
chunk += byte
75+
76+
rem_size -= bs
77+
return chunk
78+
79+
size = int(reader.readline()[:-2])
80+
chunk = read_chunk_content(size)
81+
82+
reader.read(2)
83+
84+
return chunk
85+
86+
def readChunkedBody(self, reader: io.TextIOWrapper) -> bytes:
87+
encoded_body = b""
88+
while True:
89+
chunk = self._readSingleChunk(reader)
90+
91+
if chunk is None:
92+
return None
93+
94+
if len(chunk) == 0:
95+
break
96+
97+
encoded_body += chunk
98+
99+
return encoded_body
100+
101+
class HttpRequest(HttpMessage):
102+
103+
def __init__(self, method=None, path=None, version=None):
104+
super().__init__()
105+
self.method = method
106+
self.path = path
107+
self.version = version
108+
109+
def readHeader(self, reader: io.TextIOWrapper) -> None:
110+
header_lines = self._readHeaderLines(reader)
111+
self.method, self.path, self.version = header_lines[0].split(' ')
112+
113+
for line in header_lines[1:]:
114+
self._addToHeader(line)
115+
116+
117+
RESPONSE_MESSAGES = {
118+
200: 'OK',
119+
404: 'Not Found'
120+
}
121+
122+
class HttpResponse(HttpMessage):
123+
124+
def __init__(self, code: int, headers: dict[str, str] = {}, version="1.1"):
125+
super().__init__()
126+
self.headers |= headers
127+
self.version = version
128+
self.code = code
129+
self.body = None
130+
131+
def addHeader(self, key: str, value: str) -> None:
132+
self.headers[key] = value
133+
134+
def addBody(self, body: bytes) -> None:
135+
if not isinstance(body, bytes):
136+
raise ValueError("Body must be in bytes")
137+
self.body = body
138+
self.addHeader("Content-Length", len(body))
139+
140+
def __bytes__(self) -> bytes:
141+
def firstLine():
142+
return f"HTTP/{self.version} {self.code} {RESPONSE_MESSAGES.get(self.code, 'No Message')}".encode()
143+
144+
def headers():
145+
return (f"{key}: {value}".encode() for key, value in self.headers.items())
146+
147+
return b"\r\n".join([firstLine(), *headers(), b"", self.body or b""])
148+
149+
53150
def _run(
54151
args: list[str],
55152
loader=Loader(),
56153
) -> bytes:
57-
def _readRequest(fd: IO) -> Message[str, str] | None:
58-
# Read the request header from the file descriptor
59-
parser = BytesParser()
60-
return parser.parse(fd)
61-
62-
def _sendResponse(fd: IO, status: int, headers: dict[str, str], body: bytes):
63-
fd.write(f"HTTP/2 {status}\r\n".encode())
64-
for key, value in headers.items():
65-
fd.write(f"{key}: {value}\r\n".encode())
66-
fd.write(b"\r\n")
67-
fd.write(body)
154+
155+
def sendResponse(stdin: io.TextIOWrapper, response: HttpResponse):
156+
stdin.write(bytes(response))
157+
stdin.flush()
68158

69159
with subprocess.Popen(
70160
args,
@@ -84,58 +174,63 @@ def _sendResponse(fd: IO, status: int, headers: dict[str, str], body: bytes):
84174
if stdin is None:
85175
raise ValueError("stdin is None")
86176

87-
while True:
88-
request = _readRequest(stdout)
89-
if request is None:
90-
raise ValueError("request is None")
91-
92-
if request.preamble is None:
93-
raise ValueError("request.preamble is None")
94-
95-
preamble = request.preamble.split(" ")
96-
if preamble[0] == b"GET":
97-
_sendResponse(stdin, *loader.handleRequest(preamble[1], dict(request)))
98-
elif preamble[0] == b"POST":
99-
payload = request.get_payload()
100-
if not isinstance(payload, bytes):
101-
raise ValueError("payload is not bytes")
102-
proc.terminate()
103-
return payload
104-
else:
105-
raise ValueError("Invalid request")
106-
107-
108-
def find() -> Path:
109-
return Path(__file__).parent / "bin"
110-
111-
112-
def print(
177+
# The only exception we are recovering from for now is FileNotFound, which is implemented in PM's HttPipe flow
178+
try:
179+
while True:
180+
request = HttpRequest()
181+
request.readHeader(stdout)
182+
183+
if request.method == "GET":
184+
try:
185+
headers, asset = loader.handleRequest(request.path)
186+
except FileNotFoundError:
187+
response = HttpResponse(404)
188+
else:
189+
response = HttpResponse(200, headers)
190+
response.addBody(asset)
191+
192+
sendResponse(stdin, response)
193+
elif request.method == "POST":
194+
payload = request.readChunkedBody(stdout)
195+
proc.terminate()
196+
return payload
197+
else:
198+
raise ValueError("Invalid request")
199+
except Exception as e:
200+
proc.terminate()
201+
_logger.debug(stderr.read().decode('utf-8'))
202+
raise e
203+
204+
205+
def printPM(
113206
document: bytes | str | Path,
114-
mime: str = "text/html",
207+
bin: str,
115208
loader: Loader = StaticDir(Path.cwd()),
116-
bin: Path = find(),
209+
*args: str,
117210
**kwargs: str,
118211
) -> bytes:
119-
extraArgs = []
212+
213+
extraArgs = list(args)
120214
for key, value in kwargs.items():
121215
extraArgs.append(f"--{key}")
122216
extraArgs.append(str(value))
123217

124218
if isinstance(document, Path):
125219
return _run(
126-
[str(bin), "print", "-i", str(document), "-o", "out.pdf"] + extraArgs,
220+
[bin, "print", str(document)] + extraArgs,
127221
loader,
128222
)
129223
else:
130-
with tempfile.NamedTemporaryFile(delete=False) as f:
224+
with tempfile.NamedTemporaryFile(dir=loader._path) as f:
131225
if isinstance(document, str):
132226
document = document.encode()
133227
f.write(document)
228+
f.flush()
229+
134230
return _run(
135-
[str(bin), "print", "-i", f.name, "-o", "out.pdf"] + extraArgs,
231+
[str(bin), "print", os.path.basename(f.name)] + extraArgs,
136232
loader,
137233
)
138-
return b""
139234

140235

141-
__all__ = ["Loader", "StaticDir", "print"]
236+
__all__ = ["Loader", "StaticDir", "printPM"]

meta/bindings/python/sample.py

+47-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,52 @@
11
import papermuncher
2+
from pathlib import Path
23

3-
with open("out.pdf", "wb") as f:
4-
document = """
5-
<p>Hello, world!</p>
6-
"""
7-
f.write(
8-
papermuncher.print(
4+
def sample(document, output_filename):
5+
try:
6+
output = papermuncher.printPM(
97
document,
8+
"paper-muncher",
9+
papermuncher.StaticDir(Path.cwd()),
10+
"--httpipe",
1011
paper="a4",
1112
)
12-
)
13+
except Exception as e:
14+
import traceback
15+
print(
16+
f'Error while running paper-muncher:\n{"".join(traceback.format_tb(e.__traceback__))}\n{e}\n'
17+
)
18+
19+
with open(output_filename, "wb") as f:
20+
f.write(output)
21+
22+
23+
sample(
24+
"""
25+
<html>
26+
<head>
27+
<link type="text/css" rel="stylesheet" href="./404.css"/>
28+
</head>
29+
<p>Hello, world!</p>
30+
</html>
31+
""",
32+
"css_not_found.pdf"
33+
)
34+
35+
sample(
36+
"""
37+
<p>Hello, world!</p>
38+
""",
39+
"no_css.pdf"
40+
)
41+
42+
sample(
43+
"""
44+
<html>
45+
<head>
46+
<link type="text/css" rel="stylesheet" href="./test.css"/>
47+
</head>
48+
<p>Hello, world!</p>
49+
</html>
50+
""",
51+
"with_css.pdf"
52+
)

meta/bindings/python/test.css

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
p {
2+
border: black solid 5px;
3+
}

0 commit comments

Comments
 (0)