aboutsummaryrefslogtreecommitdiff
path: root/b4/mbox.py
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-03-14 21:54:40 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-03-14 21:54:40 -0400
commitae57d6ea0b7abb7f945cac6010f5c9b28b041dde (patch)
treee21b608b991c7d1be5c4d94c1e51feecefbcc628 /b4/mbox.py
downloadb4-ae57d6ea0b7abb7f945cac6010f5c9b28b041dde.tar.gz
Initial commit after porting from korg-helpers
This is the beginning of a new tool that inherits from get-lore-mbox and attest-patches. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Diffstat (limited to 'b4/mbox.py')
-rw-r--r--b4/mbox.py397
1 files changed, 397 insertions, 0 deletions
diff --git a/b4/mbox.py b/b4/mbox.py
new file mode 100644
index 0000000..3c06bd2
--- /dev/null
+++ b/b4/mbox.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2020 by the Linux Foundation
+#
+__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
+
+import os
+import sys
+import mailbox
+import email
+import email.message
+import email.utils
+import re
+import time
+
+import urllib.parse
+import xml.etree.ElementTree
+import gzip
+
+import b4
+
+from tempfile import mkstemp
+
+logger = b4.logger
+
+
+def get_msgid_from_stdin():
+ if not sys.stdin.isatty():
+ message = email.message_from_string(sys.stdin.read())
+ return message.get('Message-ID', None)
+ logger.error('Error: pipe a message or pass msgid as parameter')
+ sys.exit(1)
+
+
+def get_pi_thread_by_url(t_mbx_url, savefile):
+ session = b4.get_requests_session()
+ resp = session.get(t_mbx_url)
+ if resp.status_code != 200:
+ logger.critical('Server returned an error: %s', resp.status_code)
+ return None
+ t_mbox = gzip.decompress(resp.content)
+ resp.close()
+ if not len(t_mbox):
+ logger.critical('No messages found for that query')
+ return None
+ with open(savefile, 'wb') as fh:
+ logger.debug('Saving %s', savefile)
+ fh.write(t_mbox)
+ return savefile
+
+
+def get_pi_thread_by_msgid(msgid, config, cmdargs):
+ wantname = cmdargs.wantname
+ outdir = cmdargs.outdir
+ # Grab the head from lore, to see where we are redirected
+ midmask = config['midmask'] % msgid
+ logger.info('Looking up %s', midmask)
+ session = b4.get_requests_session()
+ resp = session.head(midmask)
+ if resp.status_code < 300 or resp.status_code > 400:
+ logger.critical('That message-id is not known.')
+ return None
+ canonical = resp.headers['Location'].rstrip('/')
+ resp.close()
+ t_mbx_url = '%s/t.mbox.gz' % canonical
+ if wantname:
+ savefile = os.path.join(outdir, wantname)
+ else:
+ # Save it into msgid.mbox
+ savefile = '%s.t.mbx' % msgid
+ savefile = os.path.join(outdir, savefile)
+
+ loc = urllib.parse.urlparse(t_mbx_url)
+ if cmdargs.useproject:
+ logger.debug('Modifying query to use %s', cmdargs.useproject)
+ t_mbx_url = '%s://%s/%s/%s/t.mbox.gz' % (
+ loc.scheme, loc.netloc, cmdargs.useproject, msgid)
+ logger.debug('Will query: %s', t_mbx_url)
+ logger.critical('Grabbing thread from %s', loc.netloc)
+ in_mbxf = get_pi_thread_by_url(t_mbx_url, '%s-loose' % savefile)
+ if not in_mbxf:
+ return None
+ in_mbx = mailbox.mbox(in_mbxf)
+ out_mbx = mailbox.mbox(savefile)
+ b4.save_strict_thread(in_mbx, out_mbx, msgid)
+ in_mbx.close()
+ out_mbx.close()
+ os.unlink(in_mbxf)
+ return savefile
+
+
+def mbox_to_am(mboxfile, config, cmdargs):
+ outdir = cmdargs.outdir
+ wantver = cmdargs.wantver
+ wantname = cmdargs.wantname
+ covertrailers = cmdargs.covertrailers
+ mbx = mailbox.mbox(mboxfile)
+ count = len(mbx)
+ logger.info('Analyzing %s messages in the thread', count)
+ lmbx = b4.LoreMailbox()
+ # Go through the mbox once to populate base series
+ for key, msg in mbx.items():
+ lmbx.add_message(msg)
+
+ lser = lmbx.get_series(revision=wantver)
+ if lser is None and wantver is None:
+ logger.critical('No patches found.')
+ return
+ if lser is None:
+ logger.critical('Unable to find revision %s', wantver)
+ return
+ if len(lmbx.series) > 1 and not wantver:
+ logger.info('Will use the latest revision: v%s', lser.revision)
+ logger.info('You can pick other revisions using the -vN flag')
+
+ if wantname:
+ slug = wantname
+ if wantname.find('.') > -1:
+ slug = '.'.join(wantname.split('.')[:-1])
+ else:
+ slug = lser.get_slug()
+
+ am_filename = os.path.join(outdir, '%s.mbx' % slug)
+ am_cover = os.path.join(outdir, '%s.cover' % slug)
+
+ if os.path.exists(am_filename):
+ os.unlink(am_filename)
+
+ logger.info('---')
+ logger.critical('Writing %s', am_filename)
+ mbx = mailbox.mbox(am_filename)
+ am_mbx = lser.save_am_mbox(mbx, cmdargs.noaddtrailers, covertrailers,
+ trailer_order=config['trailer-order'],
+ addmysob=cmdargs.addmysob, addlink=cmdargs.addlink,
+ linkmask=config['linkmask'])
+ logger.info('---')
+
+ logger.critical('Total patches: %s', len(am_mbx))
+ if lser.has_cover and lser.patches[0].followup_trailers and not covertrailers:
+ # Warn that some trailers were sent to the cover letter
+ logger.critical('---')
+ logger.critical('NOTE: Some trailers were sent to the cover letter:')
+ for trailer in lser.patches[0].followup_trailers:
+ logger.critical(' %s: %s', trailer[0], trailer[1])
+ logger.critical('NOTE: Rerun with -t to apply them to all patches')
+
+ logger.critical('---')
+ if not lser.complete:
+ logger.critical('WARNING: Thread incomplete!')
+
+ if lser.has_cover:
+ lser.save_cover(am_cover)
+
+ top_msgid = None
+ first_body = None
+ for lmsg in lser.patches:
+ if lmsg is not None:
+ first_body = lmsg.body
+ top_msgid = lmsg.msgid
+ break
+ if top_msgid is None:
+ logger.critical('Could not find any patches in the series.')
+ return
+
+ linkurl = config['linkmask'] % top_msgid
+ if cmdargs.quiltready:
+ q_dirname = os.path.join(outdir, '%s.patches' % slug)
+ am_mbox_to_quilt(am_mbx, q_dirname)
+ logger.critical('Quilt: %s', q_dirname)
+
+ logger.critical(' Link: %s', linkurl)
+
+ base_commit = None
+ matches = re.search(r'base-commit: .*?([0-9a-f]+)', first_body, re.MULTILINE)
+ if matches:
+ base_commit = matches.groups()[0]
+ else:
+ # Try a more relaxed search
+ matches = re.search(r'based on .*?([0-9a-f]{40})', first_body, re.MULTILINE)
+ if matches:
+ base_commit = matches.groups()[0]
+
+ if base_commit:
+ logger.critical(' Base: %s', base_commit)
+ logger.critical(' git checkout -b %s %s', slug, base_commit)
+ logger.critical(' git am %s', am_filename)
+ else:
+ logger.critical(' Base: not found, sorry')
+ logger.critical(' git checkout -b %s master', slug)
+ logger.critical(' git am %s', am_filename)
+
+ am_mbx.close()
+
+ return am_filename
+
+
+def am_mbox_to_quilt(am_mbx, q_dirname):
+ if os.path.exists(q_dirname):
+ logger.critical('ERROR: Directory %s exists, not saving quilt patches', q_dirname)
+ return
+ os.mkdir(q_dirname, 0o755)
+ patch_filenames = list()
+ for key, msg in am_mbx.items():
+ # Run each message through git mailinfo
+ msg_out = mkstemp(suffix=None, prefix=None, dir=q_dirname)
+ patch_out = mkstemp(suffix=None, prefix=None, dir=q_dirname)
+ cmdargs = ['mailinfo', '--encoding=UTF-8', msg_out[1], patch_out[1]]
+ ecode, info = b4.git_run_command(None, cmdargs, msg.as_bytes(policy=b4.emlpolicy))
+ if not len(info.strip()):
+ logger.critical('ERROR: Could not get mailinfo from patch %s', msg['Subject'])
+ continue
+ patchinfo = dict()
+ for line in info.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+ chunks = line.split(':', 1)
+ patchinfo[chunks[0]] = chunks[1]
+
+ slug = re.sub(r'\W+', '_', patchinfo['Subject']).strip('_').lower()
+ patch_filename = '%04d_%s.patch' % (key+1, slug)
+ patch_filenames.append(patch_filename)
+ quilt_out = os.path.join(q_dirname, patch_filename)
+ with open(quilt_out, 'wb') as fh:
+ line = 'From: %s <%s>\n' % (patchinfo['Author'].strip(), patchinfo['Email'].strip())
+ fh.write(line.encode('utf-8'))
+ line = 'Subject: %s\n' % patchinfo['Subject'].strip()
+ fh.write(line.encode('utf-8'))
+ line = 'Date: %s\n' % patchinfo['Date'].strip()
+ fh.write(line.encode('utf-8'))
+ fh.write('\n'.encode('utf-8'))
+ with open(msg_out[1], 'r') as mfh:
+ fh.write(mfh.read().encode('utf-8'))
+ with open(patch_out[1], 'r') as pfh:
+ fh.write(pfh.read().encode('utf-8'))
+ logger.debug(' Wrote: %s', patch_filename)
+ os.unlink(msg_out[1])
+ os.unlink(patch_out[1])
+ # Write the series file
+ with open(os.path.join(q_dirname, 'series'), 'w') as sfh:
+ for patch_filename in patch_filenames:
+ sfh.write('%s\n' % patch_filename)
+
+
+def get_newest_series(mboxfile):
+ # Open the mbox and find the latest series mentioned in it
+ mbx = mailbox.mbox(mboxfile)
+ base_msg = None
+ latest_revision = None
+ seen_msgids = list()
+ seen_covers = list()
+ for key, msg in mbx.items():
+ msgid = b4.LoreMessage.get_clean_msgid(msg)
+ seen_msgids.append(msgid)
+ lsub = b4.LoreSubject(msg['Subject'])
+ # Ignore replies or counters above 1
+ if lsub.reply or lsub.counter > 1:
+ continue
+ if latest_revision is None or lsub.revision > latest_revision:
+ # New revision
+ latest_revision = lsub.revision
+ if lsub.counter == 0:
+ # And a cover letter, nice. This is the easy case
+ base_msg = msg
+ seen_covers.append(latest_revision)
+ continue
+ if lsub.counter == 1:
+ if latest_revision not in seen_covers:
+ # A patch/series without a cover letter
+ base_msg = msg
+
+ # Get subject info from base_msg again
+ lsub = b4.LoreSubject(base_msg['Subject'])
+ if not len(lsub.prefixes):
+ logger.debug('Not checking for new revisions: no prefixes on the cover letter.')
+ mbx.close()
+ return
+ base_msgid = b4.LoreMessage.get_clean_msgid(base_msg)
+ fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1]
+ msgdate = email.utils.parsedate_tz(str(base_msg['Date']))
+ startdate = time.strftime('%Y%m%d', msgdate[:9])
+ listarc = base_msg.get_all('List-Archive')[-1].strip('<>')
+ q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate)
+ queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'}))
+ logger.critical('Checking for newer revisions on %s', listarc)
+ logger.debug('Query URL: %s', queryurl)
+ session = b4.get_requests_session()
+ resp = session.get(queryurl)
+ # try to parse it
+ try:
+ tree = xml.etree.ElementTree.fromstring(resp.content)
+ except xml.etree.ElementTree.ParseError as ex:
+ logger.debug('Unable to parse results, ignoring', ex)
+ resp.close()
+ mbx.close()
+ return
+ resp.close()
+ ns = {'atom': 'http://www.w3.org/2005/Atom'}
+ entries = tree.findall('atom:entry', ns)
+
+ for entry in entries:
+ title = entry.find('atom:title', ns).text
+ lsub = b4.LoreSubject(title)
+ if lsub.reply or lsub.counter > 1:
+ logger.debug('Ignoring result (not interesting): %s', title)
+ continue
+ link = entry.find('atom:link', ns).get('href')
+ if lsub.revision < latest_revision:
+ logger.debug('Ignoring result (not new revision): %s', title)
+ continue
+ if link.find('/%s/' % base_msgid) > 0:
+ logger.debug('Ignoring result (same thread as ours):%s', title)
+ continue
+ if lsub.revision == 1 and lsub.revision == latest_revision:
+ # Someone sent a separate message with an identical title but no new vX in the subject line
+ # It's *probably* a new revision.
+ logger.debug('Likely a new revision: %s', title)
+ elif lsub.revision > latest_revision:
+ logger.debug('Definitely a new revision [v%s]: %s', lsub.revision, title)
+ else:
+ logger.debug('No idea what this is: %s', title)
+ continue
+ t_mbx_url = '%st.mbox.gz' % link
+ savefile = mkstemp('b4-get')[1]
+ nt_mboxfile = get_pi_thread_by_url(t_mbx_url, savefile)
+ nt_mbx = mailbox.mbox(nt_mboxfile)
+ # Append all of these to the existing mailbox
+ new_adds = 0
+ for nt_msg in nt_mbx:
+ nt_msgid = b4.LoreMessage.get_clean_msgid(nt_msg)
+ if nt_msgid in seen_msgids:
+ logger.debug('Duplicate message, skipping')
+ continue
+ nt_subject = re.sub(r'\s+', ' ', nt_msg['Subject'])
+ logger.debug('Adding: %s', nt_subject)
+ new_adds += 1
+ mbx.add(nt_msg)
+ seen_msgids.append(nt_msgid)
+ nt_mbx.close()
+ if new_adds:
+ logger.info('Added %s messages from thread: %s', new_adds, title)
+ logger.debug('Removing temporary %s', nt_mboxfile)
+ os.unlink(nt_mboxfile)
+
+ # We close the mbox, since we'll be reopening it later
+ mbx.close()
+
+
+def main(cmdargs):
+ config = b4.get_main_config()
+
+ if not cmdargs.localmbox:
+ if not cmdargs.msgid:
+ logger.debug('Getting Message-ID from stdin')
+ msgid = get_msgid_from_stdin()
+ if msgid is None:
+ logger.error('Unable to find a valid message-id in stdin.')
+ sys.exit(1)
+ else:
+ msgid = cmdargs.msgid
+
+ msgid = msgid.strip('<>')
+ # Handle the case when someone pastes a full URL to the message
+ matches = re.search(r'^https?://[^/]+/([^/]+)/([^/]+@[^/]+)', msgid, re.IGNORECASE)
+ if matches:
+ chunks = matches.groups()
+ msgid = chunks[1]
+ # Infer the project name from the URL, if possible
+ if chunks[0] != 'r':
+ cmdargs.useproject = chunks[0]
+
+ mboxfile = get_pi_thread_by_msgid(msgid, config, cmdargs)
+ if mboxfile is None:
+ return
+
+ # Move it into -thread
+ threadmbox = '%s-thread' % mboxfile
+ os.rename(mboxfile, threadmbox)
+ else:
+ if os.path.exists(cmdargs.localmbox):
+ threadmbox = cmdargs.localmbox
+ else:
+ logger.critical('Mailbox %s does not exist', cmdargs.localmbox)
+ sys.exit(1)
+
+ if threadmbox and cmdargs.checknewer:
+ get_newest_series(threadmbox)
+
+ if cmdargs.subcmd == 'am':
+ mbox_to_am(threadmbox, config, cmdargs)
+ if not cmdargs.localmbox:
+ os.unlink(threadmbox)
+ else:
+ mbx = mailbox.mbox(threadmbox)
+ logger.critical('Saved %s', threadmbox)
+ logger.critical('%s messages in the thread', len(mbx))