From 7e035d16a7c6903b6f4ffa6cd41834367647ece6 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Mon, 30 Mar 2020 18:07:47 -0400 Subject: Decode headers into utf-8 from QP Since we aren't planning on sending any of this as actual emails, convert EVERYTHING into utf-8 before writing messages, including QP-escaped 7-bit headers. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 13 ++++++++++++- b4/pr.py | 5 +++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 41f8dbc..acb3125 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -693,7 +693,18 @@ class LoreMessage: @staticmethod def clean_header(hdrval): - new_hdrval = re.sub(r'\n?\s+', ' ', str(hdrval)) + decoded = '' + for hstr, hcs in email.header.decode_header(hdrval): + if hcs is None: + hcs = 'utf-8' + try: + decoded += hstr.decode(hcs) + except LookupError: + # Try as utf-u + decoded += hstr.decode('utf-8', errors='replace') + except (UnicodeDecodeError, AttributeError): + decoded += hstr + new_hdrval = re.sub(r'\n?\s+', ' ', decoded) return new_hdrval.strip() @staticmethod diff --git a/b4/pr.py b/b4/pr.py index 0ed9277..e97dbce 100644 --- a/b4/pr.py +++ b/b4/pr.py @@ -13,7 +13,7 @@ import mailbox from datetime import timedelta from tempfile import mkstemp -from email import utils, charset +from email import utils, charset, header charset.add_charset('utf-8', None) @@ -241,7 +241,8 @@ def explode(gitdir, lmsg, savefile): # Move the original From and Date into the body prepend = list() if msg['from'] != lmsg.msg['from']: - prepend.append('From: %s' % msg['from']) + cleanfrom = b4.LoreMessage.clean_header(msg['from']) + prepend.append('From: %s' % ''.join(cleanfrom)) prepend.append('Date: %s' % msg['date']) body = '%s\n\n%s' % ('\n'.join(prepend), msg.get_payload(decode=True).decode('utf-8')) msg.set_payload(body) -- cgit v1.2.3