Pylons · luhn · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/setup.py b/setup.py
@@ -53,6 +53,9 @@
     packages=find_packages("src", exclude=["tests"]),
     package_dir={"": "src"},
     python_requires=">=3.8",
+    install_requires=[
+        "multipart>=0.2,<0.3",
+    ],
     zip_safe=True,
     extras_require={"testing": testing_extras, "docs": docs_extras},
 )
diff --git a/src/webob/compat.py b/src/webob/compat.py
diff --git a/src/webob/multidict.py b/src/webob/multidict.py
@@ -6,9 +6,11 @@
 """
 import binascii
 from collections.abc import MutableMapping
-from urllib.parse import urlencode as url_encode
+from urllib.parse import parse_qsl, urlencode as url_encode
 import warnings
 
+from multipart import parse_options_header
+
 __all__ = ["MultiDict", "NestedMultiDict", "NoVars", "GetDict"]
 
 
@@ -57,6 +59,9 @@ def view_list(cls, lst):
     def from_fieldstorage(cls, fs):
         """
         Create a multidict from a cgi.FieldStorage instance
+
+        Legacy.
+
         """
         obj = cls()
         # fs.list can be None when there's nothing to parse
@@ -96,6 +101,25 @@ def decode(b):
 
         return obj
 
+    @classmethod
+    def from_multipart(cls, mp):
+        obj = cls()
+
+        for part in mp:
+            if part.filename or not part.is_buffered():
+                container = MultiDictFile.from_multipart_part(part)
+                obj.add(part.name, container)
+            else:
+                obj.add(part.name, part.value)
+        return obj
+
+    @classmethod
+    def from_qs(cls, data, charset="utf-8"):
+        data = parse_qsl(data, keep_blank_values=True)
+        return cls(
+            (key.decode(charset), value.decode(charset)) for (key, value) in data
+        )
+
     def __getitem__(self, key):
         for k, v in reversed(self._items):
             if k == key:
@@ -286,6 +310,59 @@ def values(self):
 _dummy = object()
 
 
+class MultiDictFile:
+    """
+    An object representing a file upload in a ``multipart/form-data`` request.
+
+    This object has the same shape as Python's deprecated ``cgi.FieldStorage``
+    object, which was previously used by webob to represent file uploads.
+
+    """
+
+    def __init__(
+        self,
+        name,
+        filename,
+        file,
+        type,
+        type_options,
+        disposition,
+        disposition_options,
+        headers,
+    ):
+        self.name = name
+        self.filename = filename
+        self.file = file
+        self.type = type
+        self.type_options = type_options
+        self.disposition = disposition
+        self.disposition_options = disposition_options
+        self.headers = headers
+
+    @classmethod
+    def from_multipart_part(cls, part):
+        content_type = part.headers.get("Content-Type", "")
+        content_type, options = parse_options_header(content_type)
+        return cls(
+            name=part.name,
+            filename=part.filename,
+            file=part.file,
+            type=content_type,
+            type_options=options,
+            disposition=part.disposition,
+            disposition_options=part.options,
+            headers=part.headers,
+        )
+
+    @property
+    def value(self):
+        pos = self.file.tell()
+        self.file.seek(0)
+        val = self.file.read()
+        self.file.seek(pos)
+        return val
+
+
 class GetDict(MultiDict):
     #     def __init__(self, data, tracker, encoding, errors):
     #         d = lambda b: b.decode(encoding, errors)

diff --git a/src/webob/request.py b/src/webob/request.py
@@ -9,14 +9,15 @@
 from urllib.parse import quote as url_quote, quote_plus, urlencode as url_encode
 import warnings
 
+from multipart import MultipartParser
+
 from webob.acceptparse import (
     accept_charset_property,
     accept_encoding_property,
     accept_language_property,
     accept_property,
 )
 from webob.cachecontrol import CacheControl, serialize_cache_control
-from webob.compat import cgi_FieldStorage
 from webob.cookies import RequestCookies
 from webob.descriptors import (
     CHARSET_RE,
@@ -168,18 +169,7 @@ def decode(self, charset=None, errors="strict"):
         elif content_type != "multipart/form-data":
             return r
 
-        fs_environ = self.environ.copy()
-        fs_environ.setdefault("CONTENT_LENGTH", "0")
-        fs_environ["QUERY_STRING"] = ""
-        fs = cgi_FieldStorage(
-            fp=self.body_file,
-            environ=fs_environ,
-            keep_blank_values=True,
-            encoding=charset,
-            errors=errors,
-        )
-
-        fout = t.transcode_fs(fs, r._content_type_raw)
+        fout = t.transcode_multipart(self.body_file, r._content_type_raw)
 
         # this order is important, because setting body_file
         # resets content_length
@@ -796,27 +786,22 @@ def POST(self):
             return NoVars(
                 "Not an HTML form submission (Content-Type: %s)" % content_type
             )
-        self._check_charset()
-
-        self.make_body_seekable()
-        self.body_file_raw.seek(0)
 
-        fs_environ = env.copy()
-        # FieldStorage assumes a missing CONTENT_LENGTH, but a
-        # default of 0 is better:
-        fs_environ.setdefault("CONTENT_LENGTH", "0")
-        fs_environ["QUERY_STRING"] = ""
-        fs = cgi_FieldStorage(
-            fp=self.body_file,
-            environ=fs_environ,
-            keep_blank_values=True,
-            encoding="utf8",
-        )
-
-        self.body_file_raw.seek(0)
-        vars = MultiDict.from_fieldstorage(fs)
+        self._check_charset()
+        if content_type == "multipart/form-data":
+            self.make_body_seekable()
+            self.body_file_raw.seek(0)
+            boundary = _get_multipart_boundary(self._content_type_raw)
+            parser = MultipartParser(
+                self.body_file,
+                boundary,
+                charset="utf8",
+            )
+            vars = MultiDict.from_multipart(parser)
+            self.body_file_raw.seek(0)
+        else:
+            vars = MultiDict.from_qs(self.body)
         env["webob._parsed_post_vars"] = (vars, self.body_file_raw)
-
         return vars
 
     @property
@@ -1752,23 +1737,14 @@ def transcode_query(self, q):
 
         return url_encode(q)
 
-    def transcode_fs(self, fs, content_type):
-        # transcode FieldStorage
-        def decode(b):
-            return b
-
-        data = []
-
-        for field in fs.list or ():
-            field.name = decode(field.name)
-
-            if field.filename:
-                field.filename = decode(field.filename)
-                data.append((field.name, field))
-            else:
-                data.append((field.name, decode(field.value)))
-
-        # TODO: transcode big requests to temp file
-        content_type, fout = _encode_multipart(data, content_type, fout=io.BytesIO())
-
+    def transcode_multipart(self, body, content_type):
+        # Transcode multipart
+        boundary = _get_multipart_boundary(content_type)
+        parser = MultipartParser(body, boundary, charset=self.charset)
+        data = MultiDict.from_multipart(parser)
+        content_type, fout = _encode_multipart(
+            data.items(),
+            content_type,
+            fout=io.BytesIO(),
+        )
         return fout
diff --git a/src/webob/util.py b/src/webob/util.py
@@ -1,6 +1,6 @@
+from html import escape
 import warnings
 
-from webob.compat import escape
 from webob.headers import _trans_key