mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
synced 2025-02-17 11:57:00 +00:00
Try both utf-8 and windows-1252 for decoding email
Recent submissions from Cirrus were classified as spam by the lore analysis robot script. This is because cirrus used windows-1252 for the encoding which failed to decode as utf-8. Try both encodings when decoding email. Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
This commit is contained in:
parent
58ec43257c
commit
8228c2222f
@ -34,6 +34,8 @@ content_types = {
|
|||||||
def classify_content(content):
|
def classify_content(content):
|
||||||
# load content into the email library
|
# load content into the email library
|
||||||
msg = email.message_from_string(content)
|
msg = email.message_from_string(content)
|
||||||
|
decoded = None
|
||||||
|
body = None
|
||||||
|
|
||||||
# check the subject
|
# check the subject
|
||||||
subject = msg["Subject"]
|
subject = msg["Subject"]
|
||||||
@ -42,17 +44,28 @@ def classify_content(content):
|
|||||||
if "PATCH" in subject:
|
if "PATCH" in subject:
|
||||||
return ContentType.PATCH
|
return ContentType.PATCH
|
||||||
|
|
||||||
|
if msg.is_multipart():
|
||||||
for part in msg.walk():
|
for part in msg.walk():
|
||||||
if part.get_content_type() == "text/plain":
|
if part.get_content_type() == "text/plain":
|
||||||
|
body = part.get_payload(decode=True)
|
||||||
|
else:
|
||||||
|
body = msg.get_payload(decode=True)
|
||||||
|
|
||||||
|
if body:
|
||||||
|
for encoding in ["utf-8", "windows-1252"]:
|
||||||
try:
|
try:
|
||||||
body = part.get_payload(decode=True).decode("utf-8")
|
decoded = body.decode(encoding)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if decoded:
|
||||||
for key in content_types.keys():
|
for key in content_types.keys():
|
||||||
if key in body:
|
if key in decoded:
|
||||||
return content_types[key]
|
return content_types[key]
|
||||||
break
|
else:
|
||||||
except UnicodeDecodeError as e:
|
logging.warning("Failed to decode email: %s, treating as SPAM", body)
|
||||||
logging.warning("Failed to decode email: %s, treating as SPAM" % e)
|
|
||||||
break
|
|
||||||
return ContentType.SPAM
|
return ContentType.SPAM
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user