diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-06-11 09:59:21 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-06-11 09:59:21 -0400 |
commit | 0a1776fc9fdf8e03757f2ccc08f5267489472432 (patch) | |
tree | f7f4dd87d6781dcb99265b5a1dd4d9bc700679e0 | |
parent | 723f4d79a6181b60f03f9573a394a85895f5cf03 (diff) | |
download | b4-0a1776fc9fdf8e03757f2ccc08f5267489472432.tar.gz |
Save mbox files with proper unixfrom
In order to avoid some of the more obscure charset encoding problems, we
switched to using as_string() for generating messages before saving them
in an mbox file. However, this uncovered a bug where the unixfrom was
not actually generated and saved, despite as_bytes() and as_string()
supposedly behaving identically.
See:
https://docs.python.org/3/library/email.message.html#email.message.EmailMessage.as_string
This commit fixes the problem by properly setting the unixfrom and using
the recommended (and hopefully less buggy) email.generator interface
when saving mailboxes.
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 16 | ||||
-rw-r--r-- | b4/mbox.py | 8 | ||||
-rw-r--r-- | b4/pr.py | 2 |
3 files changed, 12 insertions, 14 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 2572017..a12468b 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -11,6 +11,7 @@ import fnmatch import email.utils import email.policy import email.header +import email.generator import tempfile import pathlib @@ -25,7 +26,7 @@ import mailbox import pwd from contextlib import contextmanager -from typing import Optional, Tuple, Set, List +from typing import Optional, Tuple, Set, List, TextIO from email import charset charset.add_charset('utf-8', None) @@ -2325,20 +2326,17 @@ def get_gpg_uids(keyid: str) -> list: return uids -def save_git_am_mbox(msgs: list, dest): +def save_git_am_mbox(msgs: list, dest: TextIO): # Git-am has its own understanding of what "mbox" format is that differs from Python's # mboxo implementation. Specifically, it never escapes the ">From " lines found in bodies # unless invoked with --patch-format=mboxrd (this is wrong, because ">From " escapes are also # required in the original mbox "mboxo" format). # So, save in the format that git-am expects - # "dest" should be a file handler in writable+binary mode + gen = email.generator.Generator(dest, policy=emlpolicy) for msg in msgs: - bmsg = msg.as_string(unixfrom=True, policy=emlpolicy) - # public-inbox unixfrom says "mboxrd", so replace it with something else - # so there is no confusion as it's NOT mboxrd - bmsg = re.sub('^From mboxrd@z ', 'From git@z ', bmsg) - bmsg = bmsg.rstrip('\r\n') + '\n\n' - dest.write(bmsg.encode()) + msg.set_unixfrom('From git@z Thu Jan 1 00:00:00 1970') + gen.flatten(msg, unixfrom=True) + gen.write('\n') def save_maildir(msgs: list, dest): @@ -125,12 +125,12 @@ def make_am(msgs, cmdargs, msgid): if save_maildir: b4.save_maildir(am_msgs, am_filename) else: - with open(am_filename, 'wb') as fh: + with open(am_filename, 'w') as fh: b4.save_git_am_mbox(am_msgs, fh) else: am_filename = None am_cover = None - b4.save_git_am_mbox(am_msgs, sys.stdout.buffer) + b4.save_git_am_mbox(am_msgs, sys.stdout) logger.info('---') @@ -603,7 +603,7 @@ def main(cmdargs): logger.info('%s messages in the thread', len(msgs)) if cmdargs.outdir == '-': logger.info('---') - b4.save_git_am_mbox(msgs, sys.stdout.buffer) + b4.save_git_am_mbox(msgs, sys.stdout) return # Check if outdir is a maildir @@ -646,7 +646,7 @@ def main(cmdargs): logger.info('Saved maildir %s', savename) return - with open(savename, 'wb') as fh: + with open(savename, 'w') as fh: b4.save_git_am_mbox(msgs, fh) logger.info('Saved %s', savename) @@ -586,7 +586,7 @@ def main(cmdargs): if save_maildir: b4.save_maildir(msgs, savefile) else: - with open(savefile, 'wb') as fh: + with open(savefile, 'w') as fh: b4.save_git_am_mbox(msgs, fh) logger.info('---') logger.info('Saved %s', savefile) |