Save to/cc headers as-is for tracking

If we clean the to/cc headers to get rid of all unicode escaping, we run into a Python bug that is unable to properly parse addresses, e.g.: In [5]: from email import utils In [6]: utils.getaddresses(['foo <foo@bar.com>']) Out[6]: [('foo', 'foo@bar.com')] In [7]: utils.getaddresses(['Shuming [范書銘] <shumingf@realtek.com>']) Out[7]: [('', 'Shuming'), ('', ''), ('', '范書銘'), ('', ''), ('', 'shumingf@realtek.com')] If we store the headers as-is from the original message, we are less likely to run into this bug, as all non-ascii sequences should be qp-escaped in the original headers: =?big5?B?U2h1bWluZyBbrVOu0bvKXQ==?= <shumingf@realtek.com> This doesn't fix the underlying bug in Python, but works around it. Reported-by: Mark Brown <broonie@kernel.org> Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
author: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2020-12-28 13:04:02 -0500
committer: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2020-12-28 13:13:37 -0500
commit: f93bbd3e50b1fb4507aa537f4004da545af9d890 (patch)
tree: f35124bf089c1683a01f0ee81945a2214140bce2
parent: ab9c6a69a6bc0e1e4a5de232fee29acdaa69d2a5 (diff)
download: b4-f93bbd3e50b1fb4507aa537f4004da545af9d890.tar.gz
3 files changed, 12 insertions, 7 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 4d27aea..17fae0e 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -2336,11 +2336,16 @@ def git_branch_contains(gitdir, commit_id):
     return lines
 
 
-def format_addrs(pairs):
+def format_addrs(pairs, clean=True):
     addrs = set()
     for pair in pairs:
-        # Remove any quoted-printable header junk from the name
-        addrs.add(email.utils.formataddr((LoreMessage.clean_header(pair[0]), LoreMessage.clean_header(pair[1]))))
+        pair = list(pair)
+        if pair[0] == pair[1]:
+            pair[0] = ''
+        if clean:
+            # Remove any quoted-printable header junk from the name
+            pair[0] = LoreMessage.clean_header(pair[0])
+        addrs.add(email.utils.formataddr(pair))  # noqa
     return ', '.join(addrs)
 
 
diff --git a/b4/mbox.py b/b4/mbox.py
index 38b2f0e..d998380 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -299,8 +299,8 @@ def thanks_record_am(lser, cherrypick=None):
         'subject': lmsg.full_subject,
         'fromname': lmsg.fromname,
         'fromemail': lmsg.fromemail,
-        'to': b4.format_addrs(allto),
-        'cc': b4.format_addrs(allcc),
+        'to': b4.format_addrs(allto, clean=False),
+        'cc': b4.format_addrs(allcc, clean=False),
         'references': b4.LoreMessage.clean_header(lmsg.msg['References']),
         'sentdate': b4.LoreMessage.clean_header(lmsg.msg['Date']),
         'quote': b4.make_quote(lmsg.body, maxlines=5),
diff --git a/b4/pr.py b/b4/pr.py
index b7ed9e1..d8833a9 100644
--- a/b4/pr.py
+++ b/b4/pr.py
@@ -213,8 +213,8 @@ def thanks_record_pr(lmsg):
         'subject': lmsg.full_subject,
         'fromname': lmsg.fromname,
         'fromemail': lmsg.fromemail,
-        'to': b4.format_addrs(allto),
-        'cc': b4.format_addrs(allcc),
+        'to': b4.format_addrs(allto, clean=False),
+        'cc': b4.format_addrs(allcc, clean=False),
         'references': b4.LoreMessage.clean_header(lmsg.msg['References']),
         'remote': lmsg.pr_repo,
         'ref': lmsg.pr_ref,
author	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2020-12-28 13:04:02 -0500
committer	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2020-12-28 13:13:37 -0500
commit	f93bbd3e50b1fb4507aa537f4004da545af9d890 (patch)
tree	f35124bf089c1683a01f0ee81945a2214140bce2
parent	ab9c6a69a6bc0e1e4a5de232fee29acdaa69d2a5 (diff)
download	b4-f93bbd3e50b1fb4507aa537f4004da545af9d890.tar.gz