From 0a1776fc9fdf8e03757f2ccc08f5267489472432 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Fri, 11 Jun 2021 09:59:21 -0400 Subject: Save mbox files with proper unixfrom In order to avoid some of the more obscure charset encoding problems, we switched to using as_string() for generating messages before saving them in an mbox file. However, this uncovered a bug where the unixfrom was not actually generated and saved, despite as_bytes() and as_string() supposedly behaving identically. See: https://docs.python.org/3/library/email.message.html#email.message.EmailMessage.as_string This commit fixes the problem by properly setting the unixfrom and using the recommended (and hopefully less buggy) email.generator interface when saving mailboxes. Reported-by: Geert Uytterhoeven Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 16 +++++++--------- b4/mbox.py | 8 ++++---- b4/pr.py | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 2572017..a12468b 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -11,6 +11,7 @@ import fnmatch import email.utils import email.policy import email.header +import email.generator import tempfile import pathlib @@ -25,7 +26,7 @@ import mailbox import pwd from contextlib import contextmanager -from typing import Optional, Tuple, Set, List +from typing import Optional, Tuple, Set, List, TextIO from email import charset charset.add_charset('utf-8', None) @@ -2325,20 +2326,17 @@ def get_gpg_uids(keyid: str) -> list: return uids -def save_git_am_mbox(msgs: list, dest): +def save_git_am_mbox(msgs: list, dest: TextIO): # Git-am has its own understanding of what "mbox" format is that differs from Python's # mboxo implementation. Specifically, it never escapes the ">From " lines found in bodies # unless invoked with --patch-format=mboxrd (this is wrong, because ">From " escapes are also # required in the original mbox "mboxo" format). # So, save in the format that git-am expects - # "dest" should be a file handler in writable+binary mode + gen = email.generator.Generator(dest, policy=emlpolicy) for msg in msgs: - bmsg = msg.as_string(unixfrom=True, policy=emlpolicy) - # public-inbox unixfrom says "mboxrd", so replace it with something else - # so there is no confusion as it's NOT mboxrd - bmsg = re.sub('^From mboxrd@z ', 'From git@z ', bmsg) - bmsg = bmsg.rstrip('\r\n') + '\n\n' - dest.write(bmsg.encode()) + msg.set_unixfrom('From git@z Thu Jan 1 00:00:00 1970') + gen.flatten(msg, unixfrom=True) + gen.write('\n') def save_maildir(msgs: list, dest): diff --git a/b4/mbox.py b/b4/mbox.py index eeccaf2..e722d05 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -125,12 +125,12 @@ def make_am(msgs, cmdargs, msgid): if save_maildir: b4.save_maildir(am_msgs, am_filename) else: - with open(am_filename, 'wb') as fh: + with open(am_filename, 'w') as fh: b4.save_git_am_mbox(am_msgs, fh) else: am_filename = None am_cover = None - b4.save_git_am_mbox(am_msgs, sys.stdout.buffer) + b4.save_git_am_mbox(am_msgs, sys.stdout) logger.info('---') @@ -603,7 +603,7 @@ def main(cmdargs): logger.info('%s messages in the thread', len(msgs)) if cmdargs.outdir == '-': logger.info('---') - b4.save_git_am_mbox(msgs, sys.stdout.buffer) + b4.save_git_am_mbox(msgs, sys.stdout) return # Check if outdir is a maildir @@ -646,7 +646,7 @@ def main(cmdargs): logger.info('Saved maildir %s', savename) return - with open(savename, 'wb') as fh: + with open(savename, 'w') as fh: b4.save_git_am_mbox(msgs, fh) logger.info('Saved %s', savename) diff --git a/b4/pr.py b/b4/pr.py index f0d990e..c5c5fc7 100644 --- a/b4/pr.py +++ b/b4/pr.py @@ -586,7 +586,7 @@ def main(cmdargs): if save_maildir: b4.save_maildir(msgs, savefile) else: - with open(savefile, 'wb') as fh: + with open(savefile, 'w') as fh: b4.save_git_am_mbox(msgs, fh) logger.info('---') logger.info('Saved %s', savefile) -- cgit v1.2.3