summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-05-15 17:38:36 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-05-15 17:44:23 -0400
commite4e1f6e35d147bb567e3c2d996e628fe1ca6467a (patch)
treedd184ff645423dcf2a4c38c0170f75d7618beb96
parent49b6717b785b82258f3647c79095673a8a3d8bac (diff)
downloadb4-e4e1f6e35d147bb567e3c2d996e628fe1ca6467a.tar.gz
Initial implementation of b4 diff
Based on feedback from Jason Gunthorpe, implement diffing of series by creating fake git-am commit ranges. Here's an easy example: b4 diff 20200511192156.1618284-1-mic@digikod.net Suggested-by: Jason Gunthorpe <jgg@ziepe.ca> Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py63
-rw-r--r--b4/command.py19
-rw-r--r--b4/diff.py134
-rw-r--r--b4/mbox.py47
4 files changed, 239 insertions, 24 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index a3767bf..064f8b8 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -21,7 +21,8 @@ import mailbox
import pwd
from pathlib import Path
-from tempfile import mkstemp
+from tempfile import mkstemp, TemporaryDirectory
+from contextlib import contextmanager
from email import charset
charset.add_charset('utf-8', None)
@@ -401,16 +402,11 @@ class LoreSeries:
self.trailer_mismatches = set()
self.complete = False
self.has_cover = False
+ self.subject = '(untitled)'
def __repr__(self):
out = list()
- if self.has_cover:
- out.append('- Series: [v%s] %s' % (self.revision, self.patches[0].subject))
- elif self.patches[1] is not None:
- out.append('- Series: [v%s] %s' % (self.revision, self.patches[1].subject))
- else:
- out.append('- Series: [v%s] (untitled)' % self.revision)
-
+ out.append('- Series: [v%s] %s' % (self.revision, self.subject))
out.append(' revision: %s' % self.revision)
out.append(' expected: %s' % self.expected)
out.append(' complete: %s' % self.complete)
@@ -442,6 +438,10 @@ class LoreSeries:
else:
self.patches[lmsg.counter] = lmsg
self.complete = not (None in self.patches[1:])
+ if self.patches[0] is not None:
+ self.subject = self.patches[0].subject
+ elif self.patches[1] is not None:
+ self.subject = self.patches[1].subject
def get_slug(self, extended=False):
# Find the first non-None entry
@@ -468,7 +468,7 @@ class LoreSeries:
return slug
- def save_am_mbox(self, mbx, noaddtrailers, covertrailers, trailer_order=None, addmysob=False,
+ def save_am_mbox(self, mbx, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False,
addlink=False, linkmask=None, cherrypick=None):
usercfg = get_user_config()
@@ -600,6 +600,9 @@ class LoreSeries:
# and it's no longer going to match current hash
continue
seenfiles.add(fn)
+ if set(bh) == {'0'}:
+ # New file, will for sure apply clean
+ continue
fullpath = os.path.join(topdir, fn)
if when is None:
if not os.path.exists(fullpath):
@@ -926,10 +929,17 @@ class LoreMessage:
@staticmethod
def get_indexes(diff):
indexes = set()
- for match in re.finditer(r'^diff\s+--git\s+\w/(.*)\s+\w/.*\nindex\s+([0-9a-f]+)\.\.[0-9a-f]+\s+[0-9]+$',
- diff, flags=re.I | re.M):
- fname, bindex = match.groups()
- indexes.add((fname, bindex))
+ curfile = None
+ for line in diff.split('\n'):
+ if line.find('diff ') != 0 and line.find('index ') != 0:
+ continue
+ matches = re.search(r'^diff\s+--git\s+\w/(.*)\s+\w/(.*)$', line)
+ if matches and matches.groups()[0] == matches.groups()[1]:
+ curfile = matches.groups()[0]
+ continue
+ matches = re.search(r'^index\s+([0-9a-f]+)\.\.[0-9a-f]+.*$', line)
+ if matches and curfile is not None:
+ indexes.add((curfile, matches.groups()[0]))
return indexes
@staticmethod
@@ -1627,6 +1637,33 @@ def git_get_command_lines(gitdir, args):
return lines
+@contextmanager
+def git_temp_worktree(gitdir=None):
+ """Context manager that creates a temporary work tree and chdirs into it. The
+ worktree is deleted when the contex manager is closed. Taken from gj_tools."""
+ dfn = None
+ try:
+ with TemporaryDirectory() as dfn:
+ git_run_command(gitdir, ['worktree', 'add', '--detach', '--no-checkout', dfn])
+ with in_directory(dfn):
+ yield
+ finally:
+ if dfn is not None:
+ git_run_command(gitdir, ['worktree', 'remove', dfn])
+
+
+@contextmanager
+def in_directory(dirname):
+ """Context manager that chdirs into a directory and restores the original
+ directory when closed. Taken from gj_tools."""
+ cdir = os.getcwd()
+ try:
+ os.chdir(dirname)
+ yield True
+ finally:
+ os.chdir(cdir)
+
+
def get_config_from_git(regexp, defaults=None):
args = ['config', '-z', '--get-regexp', regexp]
ecode, out = git_run_command(None, args)
diff --git a/b4/command.py b/b4/command.py
index 7709649..12900ff 100644
--- a/b4/command.py
+++ b/b4/command.py
@@ -60,6 +60,11 @@ def cmd_ty(cmdargs):
b4.ty.main(cmdargs)
+def cmd_diff(cmdargs):
+ import b4.diff
+ b4.diff.main(cmdargs)
+
+
def cmd():
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
@@ -164,6 +169,20 @@ def cmd():
help='The --since option to use when auto-matching patches (default=1.week)')
sp_ty.set_defaults(func=cmd_ty)
+ # b4 diff
+ sp_diff = subparsers.add_parser('diff', help='Show a range-diff to previous series revision')
+ sp_diff.add_argument('msgid', nargs='?',
+ help='Message ID to process, or pipe a raw message')
+ sp_diff.add_argument('-g', '--gitdir', default=None,
+ help='Operate on this git tree instead of current dir')
+ sp_diff.add_argument('-p', '--use-project', dest='useproject', default=None,
+ help='Use a specific project instead of guessing (linux-mm, linux-hardening, etc)')
+ sp_diff.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False,
+ help='Do not use local cache')
+ sp_diff.add_argument('-v', '--compare-versions', dest='wantvers', type=int, default=None, nargs='+',
+ help='Compare specific versions instead of latest and one before that, e.g. -v 3 5')
+ sp_diff.set_defaults(func=cmd_diff)
+
cmdargs = parser.parse_args()
logger.setLevel(logging.DEBUG)
diff --git a/b4/diff.py b/b4/diff.py
new file mode 100644
index 0000000..2d9ebe0
--- /dev/null
+++ b/b4/diff.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2020 by the Linux Foundation
+#
+__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'
+
+import os
+import sys
+import b4
+import b4.mbox
+import mailbox
+from tempfile import mkstemp
+
+
+logger = b4.logger
+
+
+def make_fake_commit_range(gitdir, lser):
+ logger.info('Preparing fake-am for v%s: %s', lser.revision, lser.subject)
+ with b4.git_temp_worktree(gitdir):
+ # We are in a temporary chdir at this time, so writing to a known file should be safe
+ mbxf = '.__git-am__'
+ mbx = mailbox.mbox(mbxf)
+ # Logic largely borrowed from gj_tools
+ seenfiles = set()
+ for lmsg in lser.patches[1:]:
+ logger.debug('Looking at %s', lmsg.full_subject)
+ lmsg.load_hashes()
+ for fn, fi in lmsg.blob_indexes:
+ if fn in seenfiles:
+ # We already processed this file, so this blob won't match
+ continue
+ seenfiles.add(fn)
+ if set(fi) == {'0'}:
+ # New file creation, nothing to do here
+ logger.debug(' New file: %s', fn)
+ continue
+ # Try to grab full ref_id of this hash
+ ecode, out = b4.git_run_command(gitdir, ['rev-parse', fi])
+ if ecode > 0:
+ logger.critical(' ERROR: Could not find matching blob for %s (%s)', fn, fi)
+ # TODO: better handling
+ return None, None
+ logger.debug(' Found matching blob for: %s', fn)
+ fullref = out.strip()
+ gitargs = ['update-index', '--add', '--cacheinfo', f'0644,{fullref},{fn}']
+ ecode, out = b4.git_run_command(None, gitargs)
+ if ecode > 0:
+ logger.critical(' ERROR: Could not run update-index for %s (%s)', fn, fullref)
+ return None, None
+ mbx.add(lmsg.msg.as_string(policy=b4.emlpolicy).encode('utf-8'))
+
+ mbx.close()
+ ecode, out = b4.git_run_command(None, ['write-tree'])
+ if ecode > 0:
+ logger.critical('ERROR: Could not write fake-am tree')
+ return None, None
+ treeid = out.strip()
+ # At this point we have a worktree with files that should cleanly receive a git am
+ gitargs = ['commit-tree', treeid + '^{tree}', '-F', '-']
+ ecode, out = b4.git_run_command(None, gitargs, stdin='Initial fake commit'.encode('utf-8'))
+ if ecode > 0:
+ logger.critical('ERROR: Could not commit-tree')
+ return None, None
+ start_commit = out.strip()
+ b4.git_run_command(None, ['reset', '--hard', start_commit])
+ ecode, out = b4.git_run_command(None, ['am', mbxf])
+ if ecode > 0:
+ logger.critical('ERROR: Could not fake-am version %s', lser.revision)
+ return None, None
+ ecode, out = b4.git_run_command(None, ['rev-parse', 'HEAD'])
+ end_commit = out.strip()
+ logger.info(' range: %.12s..%.12s', start_commit, end_commit)
+
+ return start_commit, end_commit
+
+
+def main(cmdargs):
+ msgid = b4.get_msgid(cmdargs)
+ if cmdargs.wantvers and len(cmdargs.wantvers) > 2:
+ logger.critical('Can only compare two versions at a time')
+ sys.exit(1)
+
+ # start by grabbing the mbox provided
+ savefile = mkstemp('b4-diff-to')[1]
+ mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile, useproject=cmdargs.useproject, nocache=cmdargs.nocache)
+ if mboxfile is None:
+ logger.critical('Unable to retrieve thread: %s', msgid)
+ return
+ logger.info('Retrieved %s messages in the thread', len(mboxfile))
+ b4.mbox.get_extra_series(mboxfile, direction=-1, wantvers=cmdargs.wantvers)
+ mbx = mailbox.mbox(mboxfile)
+ count = len(mbx)
+ logger.info('---')
+ logger.info('Analyzing %s messages in the thread', count)
+ lmbx = b4.LoreMailbox()
+ for key, msg in mbx.items():
+ lmbx.add_message(msg)
+ if cmdargs.wantvers and len(cmdargs.wantvers) == 1:
+ upper = max(lmbx.series.keys())
+ lower = cmdargs.wantvers[0]
+ elif cmdargs.wantvers and len(cmdargs.wantvers) == 2:
+ upper = max(cmdargs.wantvers)
+ lower = min(cmdargs.wantvers)
+ else:
+ upper = max(lmbx.series.keys())
+ lower = min(lmbx.series.keys())
+
+ if upper not in lmbx.series:
+ logger.critical('Could not find revision %s', upper)
+ sys.exit(1)
+ if lower not in lmbx.series:
+ logger.critical('Could not find revision %s', lower)
+ sys.exit(1)
+
+ # Prepare the lower fake-am range
+ lsc, lec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[lower])
+ if lsc is None or lec is None:
+ logger.critical('---')
+ logger.critical('Could not create fake-am range for lower series v%s', lower)
+ os.unlink(mboxfile)
+ sys.exit(1)
+ # Prepare the upper fake-am range
+ usc, uec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[upper])
+ if usc is None or uec is None:
+ logger.critical('---')
+ logger.critical('Could not create fake-am range for upper series v%s', upper)
+ os.unlink(mboxfile)
+ sys.exit(1)
+ logger.info('---')
+ logger.info('Success, you may now run:')
+ logger.info(' git range-diff %.12s..%.12s %.12s..%.12s', lsc, lec, usc, uec)
+
diff --git a/b4/mbox.py b/b4/mbox.py
index 5b08d1a..9ff88a6 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -98,8 +98,8 @@ def mbox_to_am(mboxfile, cmdargs):
cherrypick = None
logger.critical('Writing %s', am_filename)
mbx = mailbox.mbox(am_filename)
- am_mbx = lser.save_am_mbox(mbx, cmdargs.noaddtrailers, covertrailers,
- trailer_order=config['trailer-order'],
+ am_mbx = lser.save_am_mbox(mbx, noaddtrailers=cmdargs.noaddtrailers,
+ covertrailers=covertrailers, trailer_order=config['trailer-order'],
addmysob=cmdargs.addmysob, addlink=cmdargs.addlink,
linkmask=config['linkmask'], cherrypick=cherrypick)
logger.info('---')
@@ -310,7 +310,7 @@ def am_mbox_to_quilt(am_mbx, q_dirname):
sfh.write('%s\n' % patch_filename)
-def get_newest_series(mboxfile):
+def get_extra_series(mboxfile, direction=1, wantvers=None):
# Open the mbox and find the latest series mentioned in it
mbx = mailbox.mbox(mboxfile)
base_msg = None
@@ -343,14 +343,27 @@ def get_newest_series(mboxfile):
logger.debug('Not checking for new revisions: no prefixes on the cover letter.')
mbx.close()
return
+ if direction < 0 and latest_revision <= 1:
+ logger.debug('This is the latest version of the series')
+ mbx.close()
+ return
+ if direction < 0 and wantvers is None:
+ wantvers = [latest_revision - 1]
+
base_msgid = b4.LoreMessage.get_clean_msgid(base_msg)
fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1]
msgdate = email.utils.parsedate_tz(str(base_msg['Date']))
startdate = time.strftime('%Y%m%d', msgdate[:9])
listarc = base_msg.get_all('List-Archive')[-1].strip('<>')
- q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate)
- queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'}))
- logger.critical('Checking for newer revisions on %s', listarc)
+ if direction > 0:
+ q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate)
+ queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'}))
+ logger.critical('Checking for newer revisions on %s', listarc)
+ else:
+ q = 's:"%s" AND f:"%s" AND d:..%s' % (lsub.subject.replace('"', ''), fromeml, startdate)
+ queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '1'}))
+ logger.critical('Checking for older revisions on %s', listarc)
+
logger.debug('Query URL: %s', queryurl)
session = b4.get_requests_session()
resp = session.get(queryurl)
@@ -373,18 +386,30 @@ def get_newest_series(mboxfile):
logger.debug('Ignoring result (not interesting): %s', title)
continue
link = entry.find('atom:link', ns).get('href')
- if lsub.revision < latest_revision:
+ if direction > 0 and lsub.revision <= latest_revision:
logger.debug('Ignoring result (not new revision): %s', title)
continue
+ elif direction < 0 and lsub.revision >= latest_revision:
+ logger.debug('Ignoring result (not old revision): %s', title)
+ continue
+ elif direction < 0 and lsub.revision not in wantvers:
+ logger.debug('Ignoring result (not revision we want): %s', title)
+ continue
if link.find('/%s/' % base_msgid) > 0:
logger.debug('Ignoring result (same thread as ours):%s', title)
continue
if lsub.revision == 1 and lsub.revision == latest_revision:
# Someone sent a separate message with an identical title but no new vX in the subject line
- # It's *probably* a new revision.
- logger.debug('Likely a new revision: %s', title)
- elif lsub.revision > latest_revision:
+ if direction > 0:
+ # It's *probably* a new revision.
+ logger.debug('Likely a new revision: %s', title)
+ else:
+ # It's *probably* an older revision.
+ logger.debug('Likely an older revision: %s', title)
+ elif direction > 0 and lsub.revision > latest_revision:
logger.debug('Definitely a new revision [v%s]: %s', lsub.revision, title)
+ elif direction < 0 and lsub.revision < latest_revision:
+ logger.debug('Definitely an older revision [v%s]: %s', lsub.revision, title)
else:
logger.debug('No idea what this is: %s', title)
continue
@@ -445,7 +470,7 @@ def main(cmdargs):
sys.exit(1)
if threadmbox and cmdargs.checknewer:
- get_newest_series(threadmbox)
+ get_extra_series(threadmbox, direction=1)
if cmdargs.subcmd == 'am':
mbox_to_am(threadmbox, cmdargs)