Skip to content

Commit aeea79b

Browse files
Merge pull request #428 from TeamMsgExtractor/next-release
Version 0.49.0
2 parents 1e3bf80 + 30a28d8 commit aeea79b

File tree

10 files changed

+264
-59
lines changed

10 files changed

+264
-59
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
**v0.49.0**
2+
* [[TeamMsgExtractor #427](https://github.com/TeamMsgExtractor/msg-extractor/issues/427)] Adjusted code for converting time stamps to create null dates for any time stamp beyond a certain point. The point was determined to be close to the existing null dates.
3+
* [[TeamMsgExtractor #425](https://github.com/TeamMsgExtractor/msg-extractor/issues/425)] Added basic support for custom attachments that are Windows Metafiles.
4+
* Changed tolerance of bitmap custom attachment handler to allow for attachments with only a CONTENT stream. This change was made after seeing an example of a file that only had a CONTENT stream and no other streams for the custom data. The code now also tries to create default values for things previously determined from those other streams.
5+
* Fixed an issue in `tryGetMimetype` were the code didn't properly check if the data type was bytes (it only checked if it had a type).
6+
* Corrected some exports.
7+
* Added new `ErrorBehavior` value `CUSTOM_ATTACH_TOLERANT` to allow skipping checks for unused data that is normally validated.
8+
19
**v0.48.7**
210
* [[TeamMsgExtractor #420](https://github.com/TeamMsgExtractor/msg-extractor/issues/420)] Fixed typo introduced in last version.
311

README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
260260
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
261261
:target: LICENSE.txt
262262

263-
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.48.7-blue.svg
264-
:target: https://pypi.org/project/extract-msg/0.48.7/
263+
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.49.0-blue.svg
264+
:target: https://pypi.org/project/extract-msg/0.49.0/
265265

266266
.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
267267
:target: https://www.python.org/downloads/release/python-3810/

extract_msg/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2828

2929
__author__ = 'Destiny Peterson & Matthew Walker'
30-
__date__ = '2024-07-07'
31-
__version__ = '0.48.7'
30+
__date__ = '2024-08-21'
31+
__version__ = '0.49.0'
3232

3333
__all__ = [
3434
# Modules:

extract_msg/attachments/custom_att_handler/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
'CustomAttachmentHandler',
2222
'LinkedObjectAttachment',
2323
'OutlookImageDIB',
24+
'OutlookImageMetafile',
2425

2526
# Functions.
2627
'getHandler',
@@ -55,6 +56,7 @@ def registerHandler(handler: Type[CustomAttachmentHandler]) -> None:
5556
# Import built-in handler modules. They will all automatically register their
5657
# respecive handler(s).
5758
from .outlook_image_dib import OutlookImageDIB
59+
from .outlook_image_meta import OutlookImageMetafile
5860
from .lnk_obj_att import LinkedObjectAttachment
5961

6062

extract_msg/attachments/custom_att_handler/outlook_image_dib.py

+55-38
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
__all__ = [
5-
'OutlookImage',
5+
'OutlookImageDIB',
66
]
77

88

@@ -12,7 +12,7 @@
1212

1313
from . import registerHandler
1414
from .custom_handler import CustomAttachmentHandler
15-
from ...enums import DVAspect, InsecureFeatures
15+
from ...enums import DVAspect, ErrorBehavior, InsecureFeatures
1616
from ...exceptions import DependencyError, SecurityError
1717

1818

@@ -31,45 +31,60 @@ class OutlookImageDIB(CustomAttachmentHandler):
3131

3232
def __init__(self, attachment: AttachmentBase):
3333
super().__init__(attachment)
34-
# First we need to get the mailstream.
35-
stream = self.getStream('\x03MailStream')
36-
if not stream:
37-
raise ValueError('MailStream could not be found.')
38-
if len(stream) != 12:
39-
raise ValueError('MailStream is the wrong length.')
40-
# Next get the bitmap data.
34+
# First, get the mandatory bitmap data.
4135
self.__data = self.getStream('CONTENTS')
4236
if not self.__data:
4337
raise ValueError('Bitmap data could not be read for Outlook signature.')
44-
# Get the OLE data.
45-
oleStream = self.getStream('\x01Ole')
46-
if not oleStream:
47-
raise ValueError('OLE stream could not be found.')
48-
49-
# While I have only seen this stream be one length, it could in theory
50-
# be more than one length. So long as it is *at least* 20 bytes, we
51-
# call it valid.
52-
if len(oleStream) < 20:
53-
raise ValueError('OLE stream is too short.')
54-
55-
# Unpack and verify the OLE stream.
56-
vals = _ST_OLE.unpack(oleStream[:20])
57-
# Check the version magic.
58-
if vals[0] != 0x2000001:
59-
raise ValueError('OLE stream has wrong version magic.')
60-
# Check the reserved bytes.
61-
if vals[3] != 0:
62-
raise ValueError('OLE stream has non-zero reserved int.')
63-
64-
# Unpack the mailstream and create the HTML tag.
65-
vals = _ST_MAILSTREAM.unpack(stream)
66-
self.__dvaspect = DVAspect(vals[0])
67-
self.__x = vals[1]
68-
self.__y = vals[2]
38+
39+
# Next we need to get the mailstream.
40+
stream = self.getStream('\x03MailStream')
41+
if stream:
42+
if len(stream) != 12:
43+
raise ValueError('MailStream is the wrong length.')
44+
45+
# Unpack the mailstream.
46+
vals = _ST_MAILSTREAM.unpack(stream)
47+
self.__dvaspect = DVAspect(vals[0])
48+
self.__x = vals[1]
49+
self.__y = vals[2]
50+
else:
51+
#raise ValueError('MailStream could not be found.')
52+
# Create default values.
53+
self.__dvaspect = DVAspect.CONTENT
54+
# TODO figure out what the default values for these should actually
55+
# be.
56+
self.__x = 0
57+
self.__y = 0
58+
59+
# This is done regardless of default values or not.
6960
# Convert to twips for RTF.
7061
self.__xtwips = int(round(self.__x / 1.7639))
7162
self.__ytwips = int(round(self.__y / 1.7639))
7263

64+
# Check the error behavior to see if we should even do this check.
65+
if ErrorBehavior.CUSTOM_ATTACH_TOLERANT not in attachment.msg.errorBehavior:
66+
# Get the OLE data.
67+
oleStream = self.getStream('\x01Ole')
68+
if oleStream:
69+
# While I have only seen this stream be one length, it could in
70+
# theory be more than one length. So long as it is *at least* 20
71+
# bytes, we call it valid.
72+
if len(oleStream) < 20:
73+
raise ValueError('OLE stream is too short.')
74+
# Unpack and verify the OLE stream.
75+
vals = _ST_OLE.unpack(oleStream[:20])
76+
# Check the version magic.
77+
if vals[0] != 0x2000001:
78+
raise ValueError('OLE stream has wrong version magic.')
79+
# Check the reserved bytes.
80+
if vals[3] != 0:
81+
raise ValueError('OLE stream has non-zero reserved int.')
82+
else:
83+
#raise ValueError('OLE stream could not be found.')
84+
# If the stream is there we validate it, so here we just leave
85+
# it alone since nothing is actually stored.
86+
pass
87+
7388
@classmethod
7489
def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
7590
if attachment.clsid != '00000316-0000-0000-C000-000000000046':
@@ -78,10 +93,12 @@ def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
7893
# Check for the required streams.
7994
if not attachment.exists('__substg1.0_3701000D/CONTENTS'):
8095
return False
81-
if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
82-
return False
83-
if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
84-
return False
96+
# These streams were previously considered mandatory, but are now
97+
# tentatively optional.
98+
#if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
99+
# return False
100+
#if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
101+
# return False
85102

86103
return True
87104

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
from __future__ import annotations
2+
3+
4+
__all__ = [
5+
'OutlookImageMetafile',
6+
]
7+
8+
9+
import struct
10+
11+
from typing import Optional, TYPE_CHECKING
12+
13+
from . import registerHandler
14+
from .custom_handler import CustomAttachmentHandler
15+
from ...enums import DVAspect, ErrorBehavior, InsecureFeatures
16+
from ...exceptions import DependencyError, SecurityError
17+
18+
19+
if TYPE_CHECKING:
20+
from ..attachment_base import AttachmentBase
21+
22+
_ST_OLE = struct.Struct('<IIIII')
23+
_ST_MAILSTREAM = struct.Struct('<III')
24+
25+
26+
class OutlookImageMetafile(CustomAttachmentHandler):
27+
"""
28+
Custom handler for a special attachment type, a Device Independent Bitmap
29+
stored in a way special to Outlook.
30+
"""
31+
32+
def __init__(self, attachment: AttachmentBase):
33+
super().__init__(attachment)
34+
# First, get the mandatory bitmap data.
35+
self.__data = self.getStream('CONTENTS')
36+
if not self.__data:
37+
raise ValueError('Bitmap data could not be read for Outlook signature.')
38+
39+
# Next we need to get the mailstream.
40+
stream = self.getStream('\x03MailStream')
41+
if stream:
42+
if len(stream) != 12:
43+
raise ValueError('MailStream is the wrong length.')
44+
45+
# Unpack the mailstream.
46+
vals = _ST_MAILSTREAM.unpack(stream)
47+
self.__dvaspect = DVAspect(vals[0])
48+
self.__x = vals[1]
49+
self.__y = vals[2]
50+
else:
51+
#raise ValueError('MailStream could not be found.')
52+
# Create default values.
53+
self.__dvaspect = DVAspect.CONTENT
54+
# TODO figure out what the default values for these should actually
55+
# be.
56+
self.__x = 0
57+
self.__y = 0
58+
59+
# This is done regardless of default values or not.
60+
# Convert to twips for RTF.
61+
self.__xtwips = int(round(self.__x / 1.7639))
62+
self.__ytwips = int(round(self.__y / 1.7639))
63+
64+
# Check the error behavior to see if we should even do this check.
65+
if ErrorBehavior.CUSTOM_ATTACH_TOLERANT not in attachment.msg.errorBehavior:
66+
# Get the OLE data.
67+
oleStream = self.getStream('\x01Ole')
68+
if oleStream:
69+
# While I have only seen this stream be one length, it could in
70+
# theory be more than one length. So long as it is *at least* 20
71+
# bytes, we call it valid.
72+
if len(oleStream) < 20:
73+
raise ValueError('OLE stream is too short.')
74+
# Unpack and verify the OLE stream.
75+
vals = _ST_OLE.unpack(oleStream[:20])
76+
# Check the version magic.
77+
if vals[0] != 0x2000001:
78+
raise ValueError('OLE stream has wrong version magic.')
79+
# Check the reserved bytes.
80+
if vals[3] != 0:
81+
raise ValueError('OLE stream has non-zero reserved int.')
82+
else:
83+
#raise ValueError('OLE stream could not be found.')
84+
# If the stream is there we validate it, so here we just leave
85+
# it alone since nothing is actually stored.
86+
pass
87+
88+
@classmethod
89+
def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
90+
if attachment.clsid != '00000315-0000-0000-C000-000000000046':
91+
return False
92+
93+
# Check for the required streams.
94+
if not attachment.exists('__substg1.0_3701000D/CONTENTS'):
95+
return False
96+
# These streams were previously considered mandatory, but are now
97+
# tentatively optional.
98+
#if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
99+
# return False
100+
#if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
101+
# return False
102+
103+
return True
104+
105+
def generateRtf(self) -> Optional[bytes]:
106+
"""
107+
Generates the RTF to inject in place of the \\objattph tag.
108+
109+
If this function should do nothing, returns ``None``.
110+
111+
:raises DependencyError: PIL or Pillow could not be found.
112+
"""
113+
if InsecureFeatures.PIL_IMAGE_PARSING not in self.attachment.msg.insecureFeatures:
114+
raise SecurityError('Generating the RTF for a custom attachment requires the insecure feature PIL_IMAGE_PARSING.')
115+
116+
try:
117+
import PIL.Image
118+
except ImportError:
119+
raise DependencyError('PIL or Pillow is required for inserting an Outlook Image into the body.')
120+
121+
# First, convert the bitmap into a PNG so we can insert it into the
122+
# body.
123+
import io
124+
125+
# Note, use self.data instead of self.__data to allow support for
126+
# extensions.
127+
with PIL.Image.open(io.BytesIO(self.data)) as img:
128+
out = io.BytesIO()
129+
img.save(out, 'PNG')
130+
131+
hexData = out.getvalue().hex()
132+
133+
inject = '{\\*\\shppict\n{\\pict\\picscalex100\\picscaley100'
134+
inject += f'\\picw{img.width}\\pich{img.height}'
135+
inject += f'\\picwgoal{self.__xtwips}\\pichgoal{self.__ytwips}\n'
136+
inject += '\\pngblip ' + hexData + '}}'
137+
138+
return inject.encode()
139+
140+
@property
141+
def data(self) -> bytes:
142+
return self.__data
143+
144+
@property
145+
def name(self) -> str:
146+
# Try to get the name from the attachment. If that fails, name it based
147+
# on the number.
148+
if not (name := self.attachment.name):
149+
name = f'attachment {int(self.attachment.dir[-8:], 16)}'
150+
return name + '.wmf'
151+
152+
@property
153+
def obj(self) -> bytes:
154+
return self.data
155+
156+
157+
158+
registerHandler(OutlookImageMetafile)

extract_msg/enums.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,10 @@ class ErrorBehavior(enum.IntFlag):
666666
* THROW: Throw the exception regardless of type.
667667
* ATTACH_NOT_IMPLEMENTED: Silence the exception for NotImplementedError.
668668
* ATTACH_BROKEN: Silence the exception for broken attachments.
669-
* ATTACH_SUPPRESS_ALL: Silence the exception for NotImplementedError and for
670-
broken attachments.
669+
* CUSTOM_ATTACH_TOLERANT: Makes custom attachments more tolerant for
670+
data that is validated but not used.
671+
* ATTACH_SUPPRESS_ALL: Silence the exception for NotImplementedError, for
672+
broken attachments, and for custom attachment issues.
671673
* RTFDE_MALFORMED: Silences errors about malformed RTF data.
672674
* RTFDE_UNKNOWN_ERROR: Silences errors from RTFDE that are not normal.
673675
* RTFDE: Silences all errors from RTFDE.
@@ -679,22 +681,23 @@ class ErrorBehavior(enum.IntFlag):
679681
simply be dropped.
680682
* SUPPRESS_ALL: Silences all of the above.
681683
"""
682-
THROW = 0b000000
684+
THROW = 0b00000000
683685
# Attachments.
684-
ATTACH_NOT_IMPLEMENTED = 0b000001
685-
ATTACH_BROKEN = 0b000010
686-
ATTACH_SUPPRESS_ALL = 0b000011
686+
ATTACH_NOT_IMPLEMENTED = 0b00000001
687+
ATTACH_BROKEN = 0b00000010
688+
CUSTOM_ATTACH_TOLERANT = 0b00000100
689+
ATTACH_SUPPRESS_ALL = 0b00000111
687690
# RTFDE.
688-
RTFDE_MALFORMED = 0b000100
689-
RTFDE_UNKNOWN_ERROR = 0b001000
690-
RTFDE = 0b001100
691+
RTFDE_MALFORMED = 0b00001000
692+
RTFDE_UNKNOWN_ERROR = 0b00010000
693+
RTFDE = 0b00011000
691694
# General.
692-
STANDARDS_VIOLATION = 0b010000
693-
OLE_DEFECT_INCORRECT = 0b100000
694-
# Named Properties
695-
NAMED_NAME_STREAM = 0b1000000
695+
STANDARDS_VIOLATION = 0b00100000
696+
OLE_DEFECT_INCORRECT = 0b01000000
697+
# Named Properties.
698+
NAMED_NAME_STREAM = 0b10000000
696699

697-
SUPPRESS_ALL = 0b1111111
700+
SUPPRESS_ALL = 0b111111111111
698701

699702

700703

0 commit comments

Comments
 (0)