From 4950093c0c3ee71e7045b545626d2b232271cbc8 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Tue, 18 May 2021 16:33:29 -0400 Subject: Don't use mboxo for anything While trying to figure out some odd DKIM failures, I've discovered that there is an important incompatibility between git's idea of what "mbox" format is, and Python's mboxo implementation -- at least when it comes to treating "\nFrom " escapes. According to the "original mbox" standard, when a message body contains a "\nFrom " sequence, it should be converted to "\n>From " in order not to confuse the parser. When reading messages in that format, clients are supposed to back-convert "\n>From " into their original form. This is the so-called "mboxo" format, which is what Python's mailbox.mbox supports: https://docs.python.org/3/library/mailbox.html#mailbox.mbox The "mboxrd" format was created to avoid a corruption problem whereas a body that legitimately contains "\n>From " would be wrongly converted into "\nFrom " upon parsing the mailbox, so mboxrd standard requires that, when saving a mailbox, "\n>From " sequences are additionally escaped as "\n>>From ". This is the format public-inbox supports, so when we grab mailboxes from remote, they are in mboxrd format. Git will try to guess the format of the mbox file, but it will ONLY back-convert "\n>From " sequences when you specifically tell it that it's "mboxrd" format, even when it's in fact "mboxo": git am --patch-format=mboxrd If you don't force the mboxrd format, git-am will preserve all escaped "\n>From " lines as-is. We've been previously operating on the assumption that git-am's mbox support properly implements "mboxo", but this was wrong, resulting in some commits like the following: https://git.kernel.org/torvalds/c/137733d08f4a This large-ish change ditches all internal use of Python's mboxo. When asked to save mbox files, we will save them without any escaping, the way git-am (i.e. git-mailsplit) espects them. The same goes when we're outputting to stdout. There is also a way now to pass -M to both "b4 am" and "b4 mbox" that will save things as maildirs -- git-am supports this natively and thus avoids any possible parsing ambiguities. You can set a config option b4.save-maildirs=yes to make this the default behaviour. The fallout of this is fairly benign, if annoying. There is no situation in which a patch would have "\nFrom " as part of its body, so the problem only affected commit messages. We will have a handful of these sprinkled around the trees, and will hopefully not introduce any new ones once everyone switches to the b4 version that outputs things in the format git-am expects. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 158 ++++++++++++++++++----------- b4/command.py | 6 +- b4/diff.py | 48 +++++---- b4/mbox.py | 307 +++++++++++++++++++++++++++------------------------------ b4/pr.py | 27 ++--- 5 files changed, 283 insertions(+), 263 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 43fd078..bc669fe 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -11,6 +11,9 @@ import fnmatch import email.utils import email.policy import email.header +import tempfile +import pathlib + import requests import urllib.parse import datetime @@ -21,8 +24,6 @@ import mailbox # noinspection PyCompatibility import pwd -from pathlib import Path -from tempfile import mkstemp, TemporaryDirectory from contextlib import contextmanager from typing import Optional, Tuple, Set, List @@ -94,6 +95,7 @@ DEFAULT_CONFIG = { 'midmask': LOREADDR + '/r/%s', 'linkmask': LOREADDR + '/r/%s', 'trailer-order': DEFAULT_TRAILER_ORDER, + 'save-maildirs': 'no', # off: do not bother checking attestation # check: print an attaboy when attestation is found # softfail: print a warning when no attestation found @@ -216,14 +218,10 @@ class LoreMailbox: if reused: continue # Try to backfill from that project - tmp_mbox = mkstemp('b4-backfill-mbox')[1] - get_pi_thread_by_msgid(patch.msgid, tmp_mbox, useproject=projmap[entry[1]]) - mbx = mailbox.mbox(tmp_mbox) + backfills = get_pi_thread_by_msgid(patch.msgid, useproject=projmap[entry[1]]) was = len(self.msgid_map) - for msg in mbx: + for msg in backfills: self.add_message(msg) - mbx.close() - os.unlink(tmp_mbox) if len(self.msgid_map) > was: logger.info('Loaded %s messages from %s', len(self.msgid_map)-was, projurl) if self.series[revision].complete: @@ -541,8 +539,8 @@ class LoreSeries: return slug[:100] - def save_am_mbox(self, mbx, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False, - addlink=False, linkmask=None, cherrypick=None, copyccs=False): + def get_am_ready(self, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False, + addlink=False, linkmask=None, cherrypick=None, copyccs=False) -> list: usercfg = get_user_config() config = get_main_config() @@ -584,6 +582,7 @@ class LoreSeries: break at = 1 + msgs = list() for lmsg in self.patches[1:]: if cherrypick is not None: if at not in cherrypick: @@ -625,14 +624,14 @@ class LoreSeries: if noaddtrailers: add_trailers = False msg = lmsg.get_am_message(add_trailers=add_trailers, trailer_order=trailer_order, copyccs=copyccs) - # Pass a policy that avoids most legacy encoding horrors - mbx.add(msg.as_bytes(policy=emlpolicy)) + slug = '%04d_%s' % (lmsg.counter, re.sub(r'\W+', '_', lmsg.subject).strip('_').lower()) + msgs.append((slug, msg)) else: logger.error(' ERROR: missing [%s/%s]!', at, self.expected) at += 1 if attpolicy == 'off': - return mbx + return msgs if attsame and attref: logger.info(' ---') @@ -646,7 +645,7 @@ class LoreSeries: if not can_patatt: logger.info(' NOTE: install patatt for end-to-end signature verification') - return mbx + return msgs def check_applies_clean(self, gitdir, when=None): # Go through indexes and see if this series should apply cleanly @@ -1053,7 +1052,7 @@ class LoreMessage: signtime = self.date self.msg._headers.append((hn, hval)) # noqa - res = dkim.verify(self.msg.as_bytes().replace(b'\n>From ', b'\nFrom ')) + res = dkim.verify(self.msg.as_bytes()) attestor = LoreAttestorDKIM(res, identity, signtime, errors) logger.debug('DKIM verify results: %s=%s', identity, res) @@ -1484,9 +1483,9 @@ class LoreMessage: def get_am_message(self, add_trailers=True, trailer_order=None, copyccs=False): if add_trailers: self.fix_trailers(trailer_order=trailer_order, copyccs=copyccs) - am_body = self.body + am_body = self.body.rstrip('\r\n') am_msg = email.message.EmailMessage() - am_msg.set_payload(am_body.encode('utf-8')) + am_msg.set_payload(am_body.encode() + b'\n') # Clean up headers for hdrname, hdrval in self.msg.items(): lhdrname = hdrname.lower() @@ -1572,6 +1571,10 @@ class LoreSubject: subject = re.sub(r'^\s*\[[^]]*]\s*', '', subject) self.subject = subject + def get_slug(self): + unsafe = '%04d_%s' % (self.counter, self.subject) + return re.sub(r'\W+', '_', unsafe).strip('_').lower() + def __repr__(self): out = list() out.append(' full_subject: %s' % self.full_subject) @@ -1772,7 +1775,7 @@ def git_temp_worktree(gitdir=None, commitish=None): worktree is deleted when the contex manager is closed. Taken from gj_tools.""" dfn = None try: - with TemporaryDirectory() as dfn: + with tempfile.TemporaryDirectory() as dfn: gitargs = ['worktree', 'add', '--detach', '--no-checkout', dfn] if commitish: gitargs.append(commitish) @@ -1796,7 +1799,7 @@ def git_temp_clone(gitdir=None): logger.critical('Current directory is not a git checkout. Try using -g.') return None - with TemporaryDirectory() as dfn: + with tempfile.TemporaryDirectory() as dfn: gitargs = ['clone', '--mirror', '--shared', gitdir, dfn] git_run_command(None, gitargs) yield dfn @@ -1862,9 +1865,9 @@ def get_data_dir(appname: str = 'b4') -> str: if 'XDG_DATA_HOME' in os.environ: datahome = os.environ['XDG_DATA_HOME'] else: - datahome = os.path.join(str(Path.home()), '.local', 'share') + datahome = os.path.join(str(pathlib.Path.home()), '.local', 'share') datadir = os.path.join(datahome, appname) - Path(datadir).mkdir(parents=True, exist_ok=True) + pathlib.Path(datadir).mkdir(parents=True, exist_ok=True) return datadir @@ -1873,9 +1876,9 @@ def get_cache_dir(appname: str = 'b4') -> str: if 'XDG_CACHE_HOME' in os.environ: cachehome = os.environ['XDG_CACHE_HOME'] else: - cachehome = os.path.join(str(Path.home()), '.cache') + cachehome = os.path.join(str(pathlib.Path.home()), '.cache') cachedir = os.path.join(cachehome, appname) - Path(cachedir).mkdir(parents=True, exist_ok=True) + pathlib.Path(cachedir).mkdir(parents=True, exist_ok=True) if _CACHE_CLEANED: return cachedir @@ -1888,12 +1891,16 @@ def get_cache_dir(appname: str = 'b4') -> str: expmin = 600 expage = time.time() - expmin for entry in os.listdir(cachedir): - if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0: + if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0 and entry.find('.msgs'): continue - st = os.stat(os.path.join(cachedir, entry)) + fullpath = os.path.join(cachedir, entry) + st = os.stat(fullpath) if st.st_mtime < expage: logger.debug('Cleaning up cache: %s', entry) - os.unlink(os.path.join(cachedir, entry)) + if os.path.isdir(fullpath): + shutil.rmtree(fullpath) + else: + os.unlink(os.path.join(cachedir, entry)) _CACHE_CLEANED = True return cachedir @@ -1985,13 +1992,14 @@ def get_msgid(cmdargs) -> Optional[str]: return msgid -def save_strict_thread(in_mbx, out_mbx, msgid): +def get_strict_thread(msgs, msgid): want = {msgid} got = set() seen = set() maybe = dict() + strict = list() while True: - for msg in in_mbx: + for msg in msgs: c_msgid = LoreMessage.get_clean_msgid(msg) seen.add(c_msgid) if c_msgid in got: @@ -2016,7 +2024,7 @@ def save_strict_thread(in_mbx, out_mbx, msgid): maybe[ref].add(c_msgid) if c_msgid in want: - out_mbx.add(msg) + strict.append(msg) got.add(c_msgid) want.update(refs) want.discard(c_msgid) @@ -2038,19 +2046,41 @@ def save_strict_thread(in_mbx, out_mbx, msgid): if not len(want): break - if not len(out_mbx): + if not len(strict): return None - if len(in_mbx) > len(out_mbx): - logger.debug('Reduced mbox to strict matches only (%s->%s)', len(in_mbx), len(out_mbx)) + if len(msgs) > len(strict): + logger.debug('Reduced mbox to strict matches only (%s->%s)', len(msgs), len(strict)) + return strict -def get_pi_thread_by_url(t_mbx_url, savefile, nocache=False): - cachefile = get_cache_file(t_mbx_url, 'pi.mbx') - if os.path.exists(cachefile) and not nocache: - logger.debug('Using cached copy: %s', cachefile) - shutil.copyfile(cachefile, savefile) - return savefile + +def mailsplit_bytes(bmbox: bytes, outdir: str) -> list: + logger.debug('Mailsplitting the mbox into %s', outdir) + args = ['mailsplit', '--mboxrd', '-o%s' % outdir] + ecode, out = git_run_command(None, args, stdin=bmbox) + msgs = list() + if ecode > 0: + logger.critical('Unable to parse mbox received from the server') + return msgs + # Read in the files + for msg in os.listdir(outdir): + with open(os.path.join(outdir, msg), 'rb') as fh: + msgs.append(email.message_from_binary_file(fh)) + return msgs + + +def get_pi_thread_by_url(t_mbx_url, nocache=False): + msgs = list() + cachedir = get_cache_file(t_mbx_url, 'pi.msgs') + if os.path.exists(cachedir) and not nocache: + logger.info('Using cached copy: %s', cachedir) + for msg in os.listdir(cachedir): + with open(os.path.join(cachedir, msg), 'rb') as fh: + msgs.append(email.message_from_binary_file(fh)) + return msgs + + logger.critical('Grabbing thread from %s', t_mbx_url.split('://')[1]) session = get_requests_session() resp = session.get(t_mbx_url) if resp.status_code != 200: @@ -2061,16 +2091,16 @@ def get_pi_thread_by_url(t_mbx_url, savefile, nocache=False): if not len(t_mbox): logger.critical('No messages found for that query') return None - # Convert mboxrd to mboxo that python understands - t_mbox = t_mbox.replace(b'\n>>From ', b'\n>From ') - with open(savefile, 'wb') as fh: - logger.debug('Saving %s', savefile) - fh.write(t_mbox) - shutil.copyfile(savefile, cachefile) - return savefile + # Convert into individual files using git-mailsplit + with tempfile.TemporaryDirectory(suffix='-mailsplit') as tfd: + msgs = mailsplit_bytes(t_mbox, tfd) + if os.path.exists(cachedir): + shutil.rmtree(cachedir) + shutil.copytree(tfd, cachedir) + return msgs -def get_pi_thread_by_msgid(msgid, savefile, useproject=None, nocache=False): +def get_pi_thread_by_msgid(msgid, useproject=None, nocache=False): qmsgid = urllib.parse.quote_plus(msgid) config = get_main_config() # Grab the head from lore, to see where we are redirected @@ -2092,25 +2122,17 @@ def get_pi_thread_by_msgid(msgid, savefile, useproject=None, nocache=False): t_mbx_url = '%s/%s/t.mbox.gz' % (projurl, qmsgid) logger.debug('t_mbx_url=%s', t_mbx_url) - logger.critical('Grabbing thread from %s', projurl.split('://')[1]) - - tmp_mbox = mkstemp('b4-lookup-mbox')[1] - in_mbxf = get_pi_thread_by_url(t_mbx_url, tmp_mbox, nocache=nocache) - if not in_mbxf: - os.unlink(tmp_mbox) + msgs = get_pi_thread_by_url(t_mbx_url, nocache=nocache) + if not len(msgs): return None - in_mbx = mailbox.mbox(in_mbxf) - out_mbx = mailbox.mbox(savefile) - save_strict_thread(in_mbx, out_mbx, msgid) - in_mbx.close() - out_mbx.close() - os.unlink(in_mbxf) - return savefile + + strict = get_strict_thread(msgs, msgid) + return strict @contextmanager def git_format_patches(gitdir, start, end, prefixes=None, extraopts=None): - with TemporaryDirectory() as tmpd: + with tempfile.TemporaryDirectory() as tmpd: gitargs = ['format-patch', '--cover-letter', '-o', tmpd, '--signature', f'b4 {__VERSION__}'] if prefixes is not None and len(prefixes): gitargs += ['--subject-prefix', ' '.join(prefixes)] @@ -2238,3 +2260,19 @@ def get_gpg_uids(keyid: str) -> list: uids.append(chunks[9]) return uids + + +def save_git_am_mbox(msgs: list, dest): + # Git-am has its own understanding of what "mbox" format is that differs from Python's + # mboxo implementation. Specifically, it never escapes the ">From " lines found in bodies + # unless invoked with --patch-format=mboxrd (this is wrong, because ">From " escapes are also + # required in the original mbox "mboxo" format). + # So, save in the format that git-am expects + # "dest" should be a file handler in writable+binary mode + for msg in msgs: + bmsg = msg.as_bytes(unixfrom=True, policy=emlpolicy) + # public-inbox unixfrom says "mboxrd", so replace it with something else + # so there is no confusion as it's NOT mboxrd + bmsg = bmsg.replace(b'From mboxrd@z ', b'From git@z ') + bmsg = bmsg.rstrip(b'\r\n') + b'\n\n' + dest.write(bmsg) diff --git a/b4/command.py b/b4/command.py index 4c0a788..d3b0c95 100644 --- a/b4/command.py +++ b/b4/command.py @@ -23,9 +23,11 @@ def cmd_mbox_common_opts(sp): sp.add_argument('-c', '--check-newer-revisions', dest='checknewer', action='store_true', default=False, help='Check if newer patch revisions exist') sp.add_argument('-n', '--mbox-name', dest='wantname', default=None, - help='Filename to name the mbox file') + help='Filename to name the mbox destination') sp.add_argument('-m', '--use-local-mbox', dest='localmbox', default=None, help='Instead of grabbing a thread from lore, process this mbox file (or - for stdin)') + sp.add_argument('-M', '--save-as-maildir', dest='maildir', action='store_true', default=False, + help='Save as maildir (avoids mbox format ambiguities)') sp.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False, help='Do not use local cache') @@ -104,7 +106,7 @@ def cmd(): sp_am.add_argument('-l', '--add-link', dest='addlink', action='store_true', default=False, help='Add a lore.kernel.org/r/ link to every patch') sp_am.add_argument('-Q', '--quilt-ready', dest='quiltready', action='store_true', default=False, - help='Save mbox patches in a quilt-ready folder') + help='Save patches in a quilt-ready folder') sp_am.add_argument('-P', '--cherry-pick', dest='cherrypick', default=None, help='Cherry-pick a subset of patches (e.g. "-P 1-2,4,6-", ' '"-P _" to use just the msgid specified, or ' diff --git a/b4/diff.py b/b4/diff.py index 38d2a9a..abd5fec 100644 --- a/b4/diff.py +++ b/b4/diff.py @@ -10,10 +10,9 @@ import sys import b4 import b4.mbox import mailbox +import email import shutil - -from tempfile import mkstemp - +import pathlib logger = b4.logger @@ -26,7 +25,6 @@ def diff_same_thread_series(cmdargs): sys.exit(1) # start by grabbing the mbox provided - savefile = mkstemp('b4-diff-to')[1] # Do we have a cache of this lookup? identifier = msgid if wantvers: @@ -34,30 +32,35 @@ def diff_same_thread_series(cmdargs): if cmdargs.useproject: identifier += '-' + cmdargs.useproject - cachefile = b4.get_cache_file(identifier, suffix='diff.mbx') - if os.path.exists(cachefile) and not cmdargs.nocache: + cachedir = b4.get_cache_file(identifier, suffix='diff.msgs') + if os.path.exists(cachedir) and not cmdargs.nocache: logger.info('Using cached copy of the lookup') - shutil.copyfile(cachefile, savefile) - mboxfile = savefile + msgs = list() + for msg in os.listdir(cachedir): + with open(os.path.join(cachedir, msg), 'rb') as fh: + msgs.append(email.message_from_binary_file(fh)) else: - mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile, useproject=cmdargs.useproject, nocache=cmdargs.nocache) - if mboxfile is None: + msgs = b4.get_pi_thread_by_msgid(msgid, useproject=cmdargs.useproject, nocache=cmdargs.nocache) + if not msgs: logger.critical('Unable to retrieve thread: %s', msgid) return - b4.mbox.get_extra_series(mboxfile, direction=-1, wantvers=wantvers) - shutil.copyfile(mboxfile, cachefile) - - mbx = mailbox.mbox(mboxfile) - count = len(mbx) + msgs = b4.mbox.get_extra_series(msgs, direction=-1, wantvers=wantvers) + if os.path.exists(cachedir): + shutil.rmtree(cachedir) + pathlib.Path(cachedir).mkdir(parents=True) + at = 0 + for msg in msgs: + with open(os.path.join(cachedir, '%04d' % at), 'wb') as fh: + fh.write(msg.as_bytes(policy=b4.emlpolicy)) + at += 1 + + count = len(msgs) logger.info('---') logger.info('Analyzing %s messages in the thread', count) lmbx = b4.LoreMailbox() - for key, msg in mbx.items(): + for msg in msgs: lmbx.add_message(msg) - mbx.close() - os.unlink(savefile) - if wantvers and len(wantvers) == 1: upper = max(lmbx.series.keys()) lower = wantvers[0] @@ -66,7 +69,7 @@ def diff_same_thread_series(cmdargs): lower = min(wantvers) else: upper = max(lmbx.series.keys()) - lower = upper - 1 + lower = min(lmbx.series.keys()) if upper == lower: logger.critical('ERROR: Could not auto-find previous revision') @@ -101,7 +104,10 @@ def diff_mboxes(cmdargs): logger.critical('Cannot open %s', mboxfile) return None, None - mbx = mailbox.mbox(mboxfile) + if os.path.isdir(mboxfile): + mbx = mailbox.Maildir(mboxfile) + else: + mbx = mailbox.mbox(mboxfile) count = len(mbx) logger.info('Loading %s messages from %s', count, mboxfile) lmbx = b4.LoreMailbox() diff --git a/b4/mbox.py b/b4/mbox.py index c9d5715..9c8de3f 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -17,18 +17,19 @@ import json import fnmatch import shutil import pathlib +import tempfile import urllib.parse import xml.etree.ElementTree import b4 -from tempfile import mkstemp +from typing import Optional logger = b4.logger -def mbox_to_am(mboxfile, cmdargs, msgid): +def make_am(msgs, cmdargs, msgid): config = b4.get_main_config() outdir = cmdargs.outdir if outdir == '-': @@ -36,15 +37,11 @@ def mbox_to_am(mboxfile, cmdargs, msgid): wantver = cmdargs.wantver wantname = cmdargs.wantname covertrailers = cmdargs.covertrailers - if os.path.isdir(mboxfile): - mbx = mailbox.Maildir(mboxfile) - else: - mbx = mailbox.mbox(mboxfile) - count = len(mbx) + count = len(msgs) logger.info('Analyzing %s messages in the thread', count) lmbx = b4.LoreMailbox() # Go through the mbox once to populate base series - for key, msg in mbx.items(): + for msg in msgs: lmbx.add_message(msg) reroll = True @@ -61,26 +58,6 @@ def mbox_to_am(mboxfile, cmdargs, msgid): logger.info('Will use the latest revision: v%s', lser.revision) logger.info('You can pick other revisions using the -vN flag') - if wantname: - slug = wantname - if wantname.find('.') > -1: - slug = '.'.join(wantname.split('.')[:-1]) - gitbranch = slug - else: - slug = lser.get_slug(extended=True) - gitbranch = lser.get_slug(extended=False) - - if outdir != '-': - am_filename = os.path.join(outdir, '%s.mbx' % slug) - am_cover = os.path.join(outdir, '%s.cover' % slug) - - if os.path.exists(am_filename): - os.unlink(am_filename) - else: - # Create a temporary file that we will remove later - am_filename = mkstemp('b4-am-stdout')[1] - am_cover = None - logger.info('---') if cmdargs.cherrypick: cherrypick = list() @@ -111,23 +88,62 @@ def mbox_to_am(mboxfile, cmdargs, msgid): else: cherrypick = None - logger.critical('Writing %s', am_filename) - mbx = mailbox.mbox(am_filename) try: - am_mbx = lser.save_am_mbox(mbx, noaddtrailers=cmdargs.noaddtrailers, - covertrailers=covertrailers, trailer_order=config['trailer-order'], - addmysob=cmdargs.addmysob, addlink=cmdargs.addlink, - linkmask=config['linkmask'], cherrypick=cherrypick, - copyccs=cmdargs.copyccs) + am_msgs = lser.get_am_ready(noaddtrailers=cmdargs.noaddtrailers, + covertrailers=covertrailers, trailer_order=config['trailer-order'], + addmysob=cmdargs.addmysob, addlink=cmdargs.addlink, + linkmask=config['linkmask'], cherrypick=cherrypick, + copyccs=cmdargs.copyccs) except KeyError: sys.exit(1) + if cmdargs.maildir or config.get('save-maildirs', 'no') == 'yes': + save_maildir = True + dftext = 'maildir' + else: + save_maildir = False + dftext = 'mbx' + + if wantname: + slug = wantname + if wantname.find('.') > -1: + slug = '.'.join(wantname.split('.')[:-1]) + gitbranch = slug + else: + slug = lser.get_slug(extended=True) + gitbranch = lser.get_slug(extended=False) + + if outdir != '-': + am_filename = os.path.join(outdir, f'{slug}.{dftext}') + am_cover = os.path.join(outdir, f'{slug}.cover') + + if os.path.exists(am_filename): + if os.path.isdir(am_filename): + shutil.rmtree(am_filename) + else: + os.unlink(am_filename) + if save_maildir: + d_new = os.path.join(am_filename, 'new') + pathlib.Path(d_new).mkdir(parents=True) + d_cur = os.path.join(am_filename, 'cur') + pathlib.Path(d_cur).mkdir(parents=True) + for m_slug, msg in am_msgs: + with open(os.path.join(d_new, m_slug), 'wb') as mfh: + mfh.write(msg.as_bytes(policy=b4.emlpolicy)) + else: + with open(am_filename, 'wb') as fh: + b4.save_git_am_mbox([x[1] for x in am_msgs], fh) + else: + am_filename = None + am_cover = None + b4.save_git_am_mbox([x[1] for x in am_msgs], sys.stdout.buffer) + logger.info('---') if cherrypick is None: - logger.critical('Total patches: %s', len(am_mbx)) + logger.critical('Total patches: %s', len(am_msgs)) else: - logger.info('Total patches: %s (cherrypicked: %s)', len(am_mbx), cmdargs.cherrypick) + logger.info('Total patches: %s (cherrypicked: %s)', len(am_msgs), cmdargs.cherrypick) if lser.has_cover and lser.patches[0].followup_trailers and not covertrailers: # Warn that some trailers were sent to the cover letter logger.critical('---') @@ -183,8 +199,8 @@ def mbox_to_am(mboxfile, cmdargs, msgid): linkurl = config['linkmask'] % top_msgid if cmdargs.quiltready: - q_dirname = os.path.join(outdir, '%s.patches' % slug) - am_mbox_to_quilt(am_mbx, q_dirname) + q_dirname = os.path.join(outdir, f'{slug}.patches') + save_as_quilt(am_msgs, q_dirname) logger.critical('Quilt: %s', q_dirname) logger.critical(' Link: %s', linkurl) @@ -242,13 +258,6 @@ def mbox_to_am(mboxfile, cmdargs, msgid): if cmdargs.outdir != '-': logger.critical(' git am %s', am_filename) - am_mbx.close() - if cmdargs.outdir == '-': - logger.info('---') - with open(am_filename, 'rb') as fh: - shutil.copyfileobj(fh, sys.stdout.buffer) - os.unlink(am_filename) - thanks_record_am(lser, cherrypick=cherrypick) @@ -315,68 +324,57 @@ def thanks_record_am(lser, cherrypick=None): logger.debug('Wrote %s for thanks tracking', filename) -def am_mbox_to_quilt(am_mbx, q_dirname): +def save_as_quilt(am_msgs, q_dirname): if os.path.exists(q_dirname): logger.critical('ERROR: Directory %s exists, not saving quilt patches', q_dirname) return - os.mkdir(q_dirname, 0o755) + pathlib.Path(q_dirname).mkdir(parents=True) patch_filenames = list() - for key, msg in am_mbx.items(): - # Run each message through git mailinfo - msg_out = mkstemp(suffix=None, prefix=None, dir=q_dirname) - patch_out = mkstemp(suffix=None, prefix=None, dir=q_dirname) - cmdargs = ['mailinfo', '--encoding=UTF-8', msg_out[1], patch_out[1]] - ecode, info = b4.git_run_command(None, cmdargs, msg.as_bytes(policy=b4.emlpolicy)) - if not len(info.strip()): - logger.critical('ERROR: Could not get mailinfo from patch %s', msg['Subject']) - continue - patchinfo = dict() - for line in info.split('\n'): - line = line.strip() - if not line: + with tempfile.TemporaryDirectory() as tfd: + m_out = os.path.join(tfd, 'm') + p_out = os.path.join(tfd, 'p') + for slug, msg in am_msgs: + # Run each message through git mailinfo + cmdargs = ['mailinfo', '--encoding=UTF-8', '--scissors', m_out, p_out] + ecode, info = b4.git_run_command(None, cmdargs, msg.as_bytes(policy=b4.emlpolicy)) + if not len(info.strip()): + logger.critical('ERROR: Could not get mailinfo from patch %s', msg.get('Subject', '(no subject)')) continue - chunks = line.split(':', 1) - patchinfo[chunks[0]] = chunks[1] - - slug = re.sub(r'\W+', '_', patchinfo['Subject']).strip('_').lower() - patch_filename = '%04d_%s.patch' % (key+1, slug) - patch_filenames.append(patch_filename) - quilt_out = os.path.join(q_dirname, patch_filename) - with open(quilt_out, 'wb') as fh: - line = 'From: %s <%s>\n' % (patchinfo['Author'].strip(), patchinfo['Email'].strip()) - fh.write(line.encode('utf-8')) - line = 'Subject: %s\n' % patchinfo['Subject'].strip() - fh.write(line.encode('utf-8')) - line = 'Date: %s\n' % patchinfo['Date'].strip() - fh.write(line.encode('utf-8')) - fh.write('\n'.encode('utf-8')) - with open(msg_out[1], 'r') as mfh: - fh.write(mfh.read().encode('utf-8')) - with open(patch_out[1], 'r') as pfh: - fh.write(pfh.read().encode('utf-8')) - logger.debug(' Wrote: %s', patch_filename) - os.unlink(msg_out[1]) - os.unlink(patch_out[1]) + patchinfo = dict() + for line in info.split('\n'): + line = line.strip() + if not line: + continue + chunks = line.split(':', 1) + patchinfo[chunks[0]] = chunks[1].strip().encode() + + patch_filename = f'{slug}.patch' + patch_filenames.append(patch_filename) + quilt_out = os.path.join(q_dirname, patch_filename) + with open(quilt_out, 'wb') as fh: + fh.write(b'From: %s <%s>\n' % (patchinfo['Author'], patchinfo['Email'])) + fh.write(b'Subject: %s\n' % patchinfo['Subject']) + fh.write(b'Date: %s\n' % patchinfo['Date']) + fh.write(b'\n') + with open(m_out, 'rb') as mfh: + shutil.copyfileobj(mfh, fh) + with open(p_out, 'rb') as pfh: + shutil.copyfileobj(pfh, fh) + logger.debug(' Wrote: %s', patch_filename) # Write the series file with open(os.path.join(q_dirname, 'series'), 'w') as sfh: for patch_filename in patch_filenames: sfh.write('%s\n' % patch_filename) -def get_extra_series(mboxfile, direction=1, wantvers=None, nocache=False): - # Open the mbox and find the latest series mentioned in it - if os.path.isdir(mboxfile): - mbx = mailbox.Maildir(mboxfile) - else: - mbx = mailbox.mbox(mboxfile) - +def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = None, nocache: bool = False) -> list: base_msg = None latest_revision = None - seen_msgids = list() - seen_covers = list() - for key, msg in mbx.items(): + seen_msgids = set() + seen_covers = set() + for msg in msgs: msgid = b4.LoreMessage.get_clean_msgid(msg) - seen_msgids.append(msgid) + seen_msgids.add(msgid) lsub = b4.LoreSubject(msg['Subject']) # Ignore replies or counters above 1 if lsub.reply or lsub.counter > 1: @@ -389,25 +387,22 @@ def get_extra_series(mboxfile, direction=1, wantvers=None, nocache=False): if lsub.counter == 0 and not lsub.counters_inferred: # And a cover letter, nice. This is the easy case base_msg = msg - seen_covers.append(latest_revision) + seen_covers.add(latest_revision) elif lsub.counter == 1 and latest_revision not in seen_covers: # A patch/series without a cover letter base_msg = msg if base_msg is None: logger.debug('Could not find cover of 1st patch in mbox') - mbx.close() - return + return msgs # Get subject info from base_msg again lsub = b4.LoreSubject(base_msg['Subject']) if not len(lsub.prefixes): logger.debug('Not checking for new revisions: no prefixes on the cover letter.') - mbx.close() - return + return msgs if direction < 0 and latest_revision <= 1: logger.debug('This is the latest version of the series') - mbx.close() - return + return msgs if direction < 0 and wantvers is None: wantvers = [latest_revision - 1] @@ -434,8 +429,7 @@ def get_extra_series(mboxfile, direction=1, wantvers=None, nocache=False): except xml.etree.ElementTree.ParseError as ex: logger.debug('Unable to parse results, ignoring: %s', ex) resp.close() - mbx.close() - return + return msgs resp.close() ns = {'atom': 'http://www.w3.org/2005/Atom'} entries = tree.findall('atom:entry', ns) @@ -475,12 +469,10 @@ def get_extra_series(mboxfile, direction=1, wantvers=None, nocache=False): logger.debug('No idea what this is: %s', title) continue t_mbx_url = '%st.mbox.gz' % link - savefile = mkstemp('b4-get')[1] - nt_mboxfile = b4.get_pi_thread_by_url(t_mbx_url, savefile, nocache=nocache) - nt_mbx = mailbox.mbox(nt_mboxfile) + nt_msgs = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache) # Append all of these to the existing mailbox new_adds = 0 - for nt_msg in nt_mbx: + for nt_msg in nt_msgs: nt_msgid = b4.LoreMessage.get_clean_msgid(nt_msg) if nt_msgid in seen_msgids: logger.debug('Duplicate message, skipping') @@ -488,16 +480,12 @@ def get_extra_series(mboxfile, direction=1, wantvers=None, nocache=False): nt_subject = re.sub(r'\s+', ' ', nt_msg['Subject']) logger.debug('Adding: %s', nt_subject) new_adds += 1 - mbx.add(nt_msg) - seen_msgids.append(nt_msgid) - nt_mbx.close() + msgs.append(nt_msg) + seen_msgids.add(nt_msgid) if new_adds: logger.info('Added %s messages from thread: %s', new_adds, title) - logger.debug('Removing temporary %s', nt_mboxfile) - os.unlink(nt_mboxfile) - # We close the mbox, since we'll be reopening it later - mbx.close() + return msgs def main(cmdargs): @@ -505,30 +493,25 @@ def main(cmdargs): # Force nocache mode cmdargs.nocache = True - savefile = mkstemp('b4-mbox')[1] - msgid = None - + msgs = list() if not cmdargs.localmbox: msgid = b4.get_msgid(cmdargs) if not msgid: logger.error('Error: pipe a message or pass msgid as parameter') - os.unlink(savefile) sys.exit(1) - threadfile = b4.get_pi_thread_by_msgid(msgid, savefile, useproject=cmdargs.useproject, nocache=cmdargs.nocache) - if threadfile is None: - os.unlink(savefile) + msgs = b4.get_pi_thread_by_msgid(msgid, useproject=cmdargs.useproject, nocache=cmdargs.nocache) + if not len(msgs): return else: if cmdargs.localmbox == '-': - # The entire mbox is passed via stdin, so save it into a temporary file - # and use the first message for our msgid - with open(savefile, 'wb') as fh: - fh.write(sys.stdin.buffer.read()) - in_mbx = mailbox.mbox(savefile) - msg = in_mbx.get(0) - if msg: - msgid = msg.get('Message-ID', None).strip('<>') + # The entire mbox is passed via stdin, so mailsplit it and use the first message for our msgid + with tempfile.TemporaryDirectory() as tfd: + msgs = b4.mailsplit_bytes(sys.stdin.buffer.read(), tfd) + if not len(msgs): + logger.critical('Stdin did not contain any messages') + sys.exit(1) + msgid = msgs[0].get('Message-ID', None).strip('<>') elif os.path.exists(cmdargs.localmbox): msgid = b4.get_msgid(cmdargs) @@ -536,41 +519,33 @@ def main(cmdargs): in_mbx = mailbox.Maildir(cmdargs.localmbox) else: in_mbx = mailbox.mbox(cmdargs.localmbox) + if msgid: - out_mbx = mailbox.mbox(savefile) - b4.save_strict_thread(in_mbx, out_mbx, msgid) - if not len(out_mbx): + msgs = b4.get_strict_thread(in_mbx, msgid) + if not len(msgs): logger.critical('Could not find %s in %s', msgid, cmdargs.localmbox) - os.unlink(savefile) sys.exit(1) else: logger.critical('Mailbox %s does not exist', cmdargs.localmbox) - os.unlink(savefile) sys.exit(1) - threadfile = savefile - - if threadfile and cmdargs.checknewer: - get_extra_series(threadfile, direction=1) + if len(msgs) and cmdargs.checknewer: + msgs = get_extra_series(msgs, direction=1) if cmdargs.subcmd == 'am': - mbox_to_am(threadfile, cmdargs, msgid) - os.unlink(threadfile) + make_am(msgs, cmdargs, msgid) return - mbx = mailbox.mbox(threadfile) - if cmdargs.showkeys: logger.info('---') try: import patatt except ModuleNotFoundError: logger.info('--show-keys requires the patatt library') - os.unlink(threadfile) sys.exit(1) keydata = set() - for msg in mbx: + for msg in msgs: xdk = msg.get('x-developer-key') xds = msg.get('x-developer-signature') if not xdk or not xds: @@ -589,11 +564,9 @@ def main(cmdargs): logger.debug('Unknown key type: %s', algo) continue keydata.add((identity, algo, selector, keyinfo)) - mbx.close() if not keydata: logger.info('No keys found in the thread.') - os.unlink(threadfile) sys.exit(0) krpath = os.path.join(b4.get_data_dir(), 'keyring') pgp = False @@ -634,16 +607,12 @@ def main(cmdargs): logger.info('For ed25519 keys:') logger.info(' echo [pubkey] > [fullpath]') - os.unlink(threadfile) sys.exit(0) - logger.info('%s messages in the thread', len(mbx)) + logger.info('%s messages in the thread', len(msgs)) if cmdargs.outdir == '-': - mbx.close() logger.info('---') - with open(threadfile, 'rb') as fh: - shutil.copyfileobj(fh, sys.stdout.buffer) - os.unlink(threadfile) + b4.save_git_am_mbox(msgs, sys.stdout.buffer) return # Check if outdir is a maildir @@ -656,21 +625,37 @@ def main(cmdargs): if cmdargs.filterdupes: for emsg in mdr: have_msgids.add(b4.LoreMessage.get_clean_msgid(emsg)) - for msg in mbx: + for msg in msgs: if b4.LoreMessage.get_clean_msgid(msg) not in have_msgids: added += 1 mdr.add(msg) logger.info('Added to %s messages to maildir %s', added, cmdargs.outdir) - mbx.close() - os.unlink(threadfile) return + config = b4.get_main_config() + if cmdargs.maildir or config.get('save-maildirs', 'no') == 'yes': + save_maildir = True + dftext = 'thread' + else: + save_maildir = False + dftext = 'mbx' + if cmdargs.wantname: - savefile = os.path.join(cmdargs.outdir, cmdargs.wantname) + savename = os.path.join(cmdargs.outdir, cmdargs.wantname) else: - savefile = os.path.join(cmdargs.outdir, '%s.mbx' % msgid) + savename = os.path.join(cmdargs.outdir, f'{msgid}.{dftext}') + + if save_maildir: + if os.path.isdir(savename): + shutil.rmtree(savename) + md = mailbox.Maildir(savename, create=True) + for msg in msgs: + md.add(msg) + md.close() + logger.info('Saved maildir %s', savename) + return + + with open(savename, 'wb') as fh: + b4.save_git_am_mbox(msgs, fh) - mbx.close() - shutil.copy(threadfile, savefile) - logger.info('Saved %s', savefile) - os.unlink(threadfile) + logger.info('Saved %s', savename) diff --git a/b4/pr.py b/b4/pr.py index 374d8ed..8e8fdd7 100644 --- a/b4/pr.py +++ b/b4/pr.py @@ -393,19 +393,15 @@ def explode(gitdir, lmsg, mailfrom=None, retrieve_links=True, fpopts=None): # Did we already retrieve it as part of a previous tread? if msgid in seen_msgids: continue - savefile = mkstemp()[1] - mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile) - if mboxfile is not None: - # Open it and append any messages we don't yet have - ambx = mailbox.mbox(mboxfile) - for amsg in ambx: + msgs = b4.get_pi_thread_by_msgid(msgid) + if msgs: + # Append any messages we don't yet have + for amsg in msgs: amsgid = b4.LoreMessage.get_clean_msgid(amsg) if amsgid not in seen_msgids: seen_msgids.add(amsgid) logger.debug('Added linked: %s', amsg.get('Subject')) tmbx.add(amsg.as_string(policy=b4.emlpolicy).encode()) - ambx.close() - os.unlink(savefile) if len(tmbx): tmbx.close() @@ -441,21 +437,14 @@ def main(cmdargs): logger.debug('Getting PR message from public-inbox') msgid = b4.get_msgid(cmdargs) - savefile = mkstemp()[1] - mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile) - if mboxfile is None: - os.unlink(savefile) + msgs = b4.get_pi_thread_by_msgid(msgid) + if not msgs: return - # Find the message with the msgid we were asked about - mbx = mailbox.mbox(mboxfile) - for msg in mbx: + for msg in msgs: mmsgid = b4.LoreMessage.get_clean_msgid(msg) if mmsgid == msgid: lmsg = parse_pr_data(msg) - - # Got all we need from it - mbx.close() - os.unlink(savefile) + break if lmsg is None or lmsg.pr_remote_tip_commit is None: logger.critical('ERROR: Could not find pull request info in %s', msgid) -- cgit v1.2.3