aboutsummaryrefslogtreecommitdiff
path: root/b4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'b4/__init__.py')
-rw-r--r--b4/__init__.py158
1 files changed, 98 insertions, 60 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 43fd078..bc669fe 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -11,6 +11,9 @@ import fnmatch
import email.utils
import email.policy
import email.header
+import tempfile
+import pathlib
+
import requests
import urllib.parse
import datetime
@@ -21,8 +24,6 @@ import mailbox
# noinspection PyCompatibility
import pwd
-from pathlib import Path
-from tempfile import mkstemp, TemporaryDirectory
from contextlib import contextmanager
from typing import Optional, Tuple, Set, List
@@ -94,6 +95,7 @@ DEFAULT_CONFIG = {
'midmask': LOREADDR + '/r/%s',
'linkmask': LOREADDR + '/r/%s',
'trailer-order': DEFAULT_TRAILER_ORDER,
+ 'save-maildirs': 'no',
# off: do not bother checking attestation
# check: print an attaboy when attestation is found
# softfail: print a warning when no attestation found
@@ -216,14 +218,10 @@ class LoreMailbox:
if reused:
continue
# Try to backfill from that project
- tmp_mbox = mkstemp('b4-backfill-mbox')[1]
- get_pi_thread_by_msgid(patch.msgid, tmp_mbox, useproject=projmap[entry[1]])
- mbx = mailbox.mbox(tmp_mbox)
+ backfills = get_pi_thread_by_msgid(patch.msgid, useproject=projmap[entry[1]])
was = len(self.msgid_map)
- for msg in mbx:
+ for msg in backfills:
self.add_message(msg)
- mbx.close()
- os.unlink(tmp_mbox)
if len(self.msgid_map) > was:
logger.info('Loaded %s messages from %s', len(self.msgid_map)-was, projurl)
if self.series[revision].complete:
@@ -541,8 +539,8 @@ class LoreSeries:
return slug[:100]
- def save_am_mbox(self, mbx, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False,
- addlink=False, linkmask=None, cherrypick=None, copyccs=False):
+ def get_am_ready(self, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False,
+ addlink=False, linkmask=None, cherrypick=None, copyccs=False) -> list:
usercfg = get_user_config()
config = get_main_config()
@@ -584,6 +582,7 @@ class LoreSeries:
break
at = 1
+ msgs = list()
for lmsg in self.patches[1:]:
if cherrypick is not None:
if at not in cherrypick:
@@ -625,14 +624,14 @@ class LoreSeries:
if noaddtrailers:
add_trailers = False
msg = lmsg.get_am_message(add_trailers=add_trailers, trailer_order=trailer_order, copyccs=copyccs)
- # Pass a policy that avoids most legacy encoding horrors
- mbx.add(msg.as_bytes(policy=emlpolicy))
+ slug = '%04d_%s' % (lmsg.counter, re.sub(r'\W+', '_', lmsg.subject).strip('_').lower())
+ msgs.append((slug, msg))
else:
logger.error(' ERROR: missing [%s/%s]!', at, self.expected)
at += 1
if attpolicy == 'off':
- return mbx
+ return msgs
if attsame and attref:
logger.info(' ---')
@@ -646,7 +645,7 @@ class LoreSeries:
if not can_patatt:
logger.info(' NOTE: install patatt for end-to-end signature verification')
- return mbx
+ return msgs
def check_applies_clean(self, gitdir, when=None):
# Go through indexes and see if this series should apply cleanly
@@ -1053,7 +1052,7 @@ class LoreMessage:
signtime = self.date
self.msg._headers.append((hn, hval)) # noqa
- res = dkim.verify(self.msg.as_bytes().replace(b'\n>From ', b'\nFrom '))
+ res = dkim.verify(self.msg.as_bytes())
attestor = LoreAttestorDKIM(res, identity, signtime, errors)
logger.debug('DKIM verify results: %s=%s', identity, res)
@@ -1484,9 +1483,9 @@ class LoreMessage:
def get_am_message(self, add_trailers=True, trailer_order=None, copyccs=False):
if add_trailers:
self.fix_trailers(trailer_order=trailer_order, copyccs=copyccs)
- am_body = self.body
+ am_body = self.body.rstrip('\r\n')
am_msg = email.message.EmailMessage()
- am_msg.set_payload(am_body.encode('utf-8'))
+ am_msg.set_payload(am_body.encode() + b'\n')
# Clean up headers
for hdrname, hdrval in self.msg.items():
lhdrname = hdrname.lower()
@@ -1572,6 +1571,10 @@ class LoreSubject:
subject = re.sub(r'^\s*\[[^]]*]\s*', '', subject)
self.subject = subject
+ def get_slug(self):
+ unsafe = '%04d_%s' % (self.counter, self.subject)
+ return re.sub(r'\W+', '_', unsafe).strip('_').lower()
+
def __repr__(self):
out = list()
out.append(' full_subject: %s' % self.full_subject)
@@ -1772,7 +1775,7 @@ def git_temp_worktree(gitdir=None, commitish=None):
worktree is deleted when the contex manager is closed. Taken from gj_tools."""
dfn = None
try:
- with TemporaryDirectory() as dfn:
+ with tempfile.TemporaryDirectory() as dfn:
gitargs = ['worktree', 'add', '--detach', '--no-checkout', dfn]
if commitish:
gitargs.append(commitish)
@@ -1796,7 +1799,7 @@ def git_temp_clone(gitdir=None):
logger.critical('Current directory is not a git checkout. Try using -g.')
return None
- with TemporaryDirectory() as dfn:
+ with tempfile.TemporaryDirectory() as dfn:
gitargs = ['clone', '--mirror', '--shared', gitdir, dfn]
git_run_command(None, gitargs)
yield dfn
@@ -1862,9 +1865,9 @@ def get_data_dir(appname: str = 'b4') -> str:
if 'XDG_DATA_HOME' in os.environ:
datahome = os.environ['XDG_DATA_HOME']
else:
- datahome = os.path.join(str(Path.home()), '.local', 'share')
+ datahome = os.path.join(str(pathlib.Path.home()), '.local', 'share')
datadir = os.path.join(datahome, appname)
- Path(datadir).mkdir(parents=True, exist_ok=True)
+ pathlib.Path(datadir).mkdir(parents=True, exist_ok=True)
return datadir
@@ -1873,9 +1876,9 @@ def get_cache_dir(appname: str = 'b4') -> str:
if 'XDG_CACHE_HOME' in os.environ:
cachehome = os.environ['XDG_CACHE_HOME']
else:
- cachehome = os.path.join(str(Path.home()), '.cache')
+ cachehome = os.path.join(str(pathlib.Path.home()), '.cache')
cachedir = os.path.join(cachehome, appname)
- Path(cachedir).mkdir(parents=True, exist_ok=True)
+ pathlib.Path(cachedir).mkdir(parents=True, exist_ok=True)
if _CACHE_CLEANED:
return cachedir
@@ -1888,12 +1891,16 @@ def get_cache_dir(appname: str = 'b4') -> str:
expmin = 600
expage = time.time() - expmin
for entry in os.listdir(cachedir):
- if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0:
+ if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0 and entry.find('.msgs'):
continue
- st = os.stat(os.path.join(cachedir, entry))
+ fullpath = os.path.join(cachedir, entry)
+ st = os.stat(fullpath)
if st.st_mtime < expage:
logger.debug('Cleaning up cache: %s', entry)
- os.unlink(os.path.join(cachedir, entry))
+ if os.path.isdir(fullpath):
+ shutil.rmtree(fullpath)
+ else:
+ os.unlink(os.path.join(cachedir, entry))
_CACHE_CLEANED = True
return cachedir
@@ -1985,13 +1992,14 @@ def get_msgid(cmdargs) -> Optional[str]:
return msgid
-def save_strict_thread(in_mbx, out_mbx, msgid):
+def get_strict_thread(msgs, msgid):
want = {msgid}
got = set()
seen = set()
maybe = dict()
+ strict = list()
while True:
- for msg in in_mbx:
+ for msg in msgs:
c_msgid = LoreMessage.get_clean_msgid(msg)
seen.add(c_msgid)
if c_msgid in got:
@@ -2016,7 +2024,7 @@ def save_strict_thread(in_mbx, out_mbx, msgid):
maybe[ref].add(c_msgid)
if c_msgid in want:
- out_mbx.add(msg)
+ strict.append(msg)
got.add(c_msgid)
want.update(refs)
want.discard(c_msgid)
@@ -2038,19 +2046,41 @@ def save_strict_thread(in_mbx, out_mbx, msgid):
if not len(want):
break
- if not len(out_mbx):
+ if not len(strict):
return None
- if len(in_mbx) > len(out_mbx):
- logger.debug('Reduced mbox to strict matches only (%s->%s)', len(in_mbx), len(out_mbx))
+ if len(msgs) > len(strict):
+ logger.debug('Reduced mbox to strict matches only (%s->%s)', len(msgs), len(strict))
+ return strict
-def get_pi_thread_by_url(t_mbx_url, savefile, nocache=False):
- cachefile = get_cache_file(t_mbx_url, 'pi.mbx')
- if os.path.exists(cachefile) and not nocache:
- logger.debug('Using cached copy: %s', cachefile)
- shutil.copyfile(cachefile, savefile)
- return savefile
+
+def mailsplit_bytes(bmbox: bytes, outdir: str) -> list:
+ logger.debug('Mailsplitting the mbox into %s', outdir)
+ args = ['mailsplit', '--mboxrd', '-o%s' % outdir]
+ ecode, out = git_run_command(None, args, stdin=bmbox)
+ msgs = list()
+ if ecode > 0:
+ logger.critical('Unable to parse mbox received from the server')
+ return msgs
+ # Read in the files
+ for msg in os.listdir(outdir):
+ with open(os.path.join(outdir, msg), 'rb') as fh:
+ msgs.append(email.message_from_binary_file(fh))
+ return msgs
+
+
+def get_pi_thread_by_url(t_mbx_url, nocache=False):
+ msgs = list()
+ cachedir = get_cache_file(t_mbx_url, 'pi.msgs')
+ if os.path.exists(cachedir) and not nocache:
+ logger.info('Using cached copy: %s', cachedir)
+ for msg in os.listdir(cachedir):
+ with open(os.path.join(cachedir, msg), 'rb') as fh:
+ msgs.append(email.message_from_binary_file(fh))
+ return msgs
+
+ logger.critical('Grabbing thread from %s', t_mbx_url.split('://')[1])
session = get_requests_session()
resp = session.get(t_mbx_url)
if resp.status_code != 200:
@@ -2061,16 +2091,16 @@ def get_pi_thread_by_url(t_mbx_url, savefile, nocache=False):
if not len(t_mbox):
logger.critical('No messages found for that query')
return None
- # Convert mboxrd to mboxo that python understands
- t_mbox = t_mbox.replace(b'\n>>From ', b'\n>From ')
- with open(savefile, 'wb') as fh:
- logger.debug('Saving %s', savefile)
- fh.write(t_mbox)
- shutil.copyfile(savefile, cachefile)
- return savefile
+ # Convert into individual files using git-mailsplit
+ with tempfile.TemporaryDirectory(suffix='-mailsplit') as tfd:
+ msgs = mailsplit_bytes(t_mbox, tfd)
+ if os.path.exists(cachedir):
+ shutil.rmtree(cachedir)
+ shutil.copytree(tfd, cachedir)
+ return msgs
-def get_pi_thread_by_msgid(msgid, savefile, useproject=None, nocache=False):
+def get_pi_thread_by_msgid(msgid, useproject=None, nocache=False):
qmsgid = urllib.parse.quote_plus(msgid)
config = get_main_config()
# Grab the head from lore, to see where we are redirected
@@ -2092,25 +2122,17 @@ def get_pi_thread_by_msgid(msgid, savefile, useproject=None, nocache=False):
t_mbx_url = '%s/%s/t.mbox.gz' % (projurl, qmsgid)
logger.debug('t_mbx_url=%s', t_mbx_url)
- logger.critical('Grabbing thread from %s', projurl.split('://')[1])
-
- tmp_mbox = mkstemp('b4-lookup-mbox')[1]
- in_mbxf = get_pi_thread_by_url(t_mbx_url, tmp_mbox, nocache=nocache)
- if not in_mbxf:
- os.unlink(tmp_mbox)
+ msgs = get_pi_thread_by_url(t_mbx_url, nocache=nocache)
+ if not len(msgs):
return None
- in_mbx = mailbox.mbox(in_mbxf)
- out_mbx = mailbox.mbox(savefile)
- save_strict_thread(in_mbx, out_mbx, msgid)
- in_mbx.close()
- out_mbx.close()
- os.unlink(in_mbxf)
- return savefile
+
+ strict = get_strict_thread(msgs, msgid)
+ return strict
@contextmanager
def git_format_patches(gitdir, start, end, prefixes=None, extraopts=None):
- with TemporaryDirectory() as tmpd:
+ with tempfile.TemporaryDirectory() as tmpd:
gitargs = ['format-patch', '--cover-letter', '-o', tmpd, '--signature', f'b4 {__VERSION__}']
if prefixes is not None and len(prefixes):
gitargs += ['--subject-prefix', ' '.join(prefixes)]
@@ -2238,3 +2260,19 @@ def get_gpg_uids(keyid: str) -> list:
uids.append(chunks[9])
return uids
+
+
+def save_git_am_mbox(msgs: list, dest):
+ # Git-am has its own understanding of what "mbox" format is that differs from Python's
+ # mboxo implementation. Specifically, it never escapes the ">From " lines found in bodies
+ # unless invoked with --patch-format=mboxrd (this is wrong, because ">From " escapes are also
+ # required in the original mbox "mboxo" format).
+ # So, save in the format that git-am expects
+ # "dest" should be a file handler in writable+binary mode
+ for msg in msgs:
+ bmsg = msg.as_bytes(unixfrom=True, policy=emlpolicy)
+ # public-inbox unixfrom says "mboxrd", so replace it with something else
+ # so there is no confusion as it's NOT mboxrd
+ bmsg = bmsg.replace(b'From mboxrd@z ', b'From git@z ')
+ bmsg = bmsg.rstrip(b'\r\n') + b'\n\n'
+ dest.write(bmsg)