Catch unicode decode errors

emails with spam might have non-ASCII characters, don't let the script explode.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
This commit is contained in:
Mario Limonciello 2023-10-23 10:10:24 -05:00
parent d983107a2d
commit 7bfa5f4d10
1 changed files with 9 additions and 5 deletions

View File

@ -44,11 +44,15 @@ def classify_content(content):
for part in msg.walk(): for part in msg.walk():
if part.get_content_type() == "text/plain": if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True).decode("utf-8") try:
for key in content_types.keys(): body = part.get_payload(decode=True).decode("utf-8")
if key in body: for key in content_types.keys():
return content_types[key] if key in body:
break return content_types[key]
break
except UnicodeDecodeError as e:
logging.warning("Failed to decode email: %s, treating as SPAM" % e)
break
return ContentType.SPAM return ContentType.SPAM