diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-30 18:07:47 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-30 18:07:47 -0400 |
commit | 7e035d16a7c6903b6f4ffa6cd41834367647ece6 (patch) | |
tree | 195d8905b609fefc700dd68f73ac7aefdf644161 | |
parent | e123952efd144401a198ab1f8337eb2529e26f95 (diff) | |
download | b4-7e035d16a7c6903b6f4ffa6cd41834367647ece6.tar.gz |
Decode headers into utf-8 from QP
Since we aren't planning on sending any of this as actual emails,
convert EVERYTHING into utf-8 before writing messages, including
QP-escaped 7-bit headers.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 13 | ||||
-rw-r--r-- | b4/pr.py | 5 |
2 files changed, 15 insertions, 3 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 41f8dbc..acb3125 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -693,7 +693,18 @@ class LoreMessage: @staticmethod def clean_header(hdrval): - new_hdrval = re.sub(r'\n?\s+', ' ', str(hdrval)) + decoded = '' + for hstr, hcs in email.header.decode_header(hdrval): + if hcs is None: + hcs = 'utf-8' + try: + decoded += hstr.decode(hcs) + except LookupError: + # Try as utf-u + decoded += hstr.decode('utf-8', errors='replace') + except (UnicodeDecodeError, AttributeError): + decoded += hstr + new_hdrval = re.sub(r'\n?\s+', ' ', decoded) return new_hdrval.strip() @staticmethod @@ -13,7 +13,7 @@ import mailbox from datetime import timedelta from tempfile import mkstemp -from email import utils, charset +from email import utils, charset, header charset.add_charset('utf-8', None) @@ -241,7 +241,8 @@ def explode(gitdir, lmsg, savefile): # Move the original From and Date into the body prepend = list() if msg['from'] != lmsg.msg['from']: - prepend.append('From: %s' % msg['from']) + cleanfrom = b4.LoreMessage.clean_header(msg['from']) + prepend.append('From: %s' % ''.join(cleanfrom)) prepend.append('Date: %s' % msg['date']) body = '%s\n\n%s' % ('\n'.join(prepend), msg.get_payload(decode=True).decode('utf-8')) msg.set_payload(body) |