Skip to content

Commit 7e3a7f0

Browse files
Add corrections for embedded msg missing unicode
1 parent 1765fa5 commit 7e3a7f0

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

extract_msg/ole_writer.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,7 @@ def fromMsg(self, msg: MSGFile, allowBadEmbed: bool = False) -> None:
809809
Copies the streams and stream information necessary from the MSG file.
810810
811811
:param allowBadEmbed: If true, attempts to skip steps that will fail if
812-
the embedded msg file violates standards.
812+
the embedded msg file violates standards. It will also attempt to repair the data to try to ensure it can open in Outlook.
813813
"""
814814
# Get the root OLE entry's CLSID.
815815
self.__rootEntry.clsid = _unClsid(msg._getOleEntry('/').clsid)
@@ -828,7 +828,17 @@ def fromMsg(self, msg: MSGFile, allowBadEmbed: bool = False) -> None:
828828
# specific place. So let's check if we are doing the properties
829829
# stream and then if we are embedded.
830830
if x[0] == '__properties_version1.0' and msg.prefixLen > 0:
831-
data = data[:24] + b'\x00\x00\x00\x00\x00\x00\x00\x00' + data[24:]
831+
if len(data) % 16 != 0:
832+
data = data[:24] + b'\x00\x00\x00\x00\x00\x00\x00\x00' + data[24:]
833+
elif not allowBadEmbed:
834+
# If we are not allowing bad data, throw an error.
835+
raise StandardViolationError('Embedded msg file attempted to be extracted that contains a top level properties stream.')
836+
if allowBadEmbed:
837+
# See if we need to fix the properties stream at all.
838+
if msg.getPropertyVal('340D0003') is None:
839+
if msg.areStringsUnicode:
840+
# We need to add a property to allow this file to open:
841+
data += b'\x03\x00\x0D\x34\x02\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00'
832842
self.addOleEntry(x, entry, data)
833843

834844
# Now check if it is an embedded file. If so, we need to copy the named

0 commit comments

Comments
 (0)