From e4e1f6e35d147bb567e3c2d996e628fe1ca6467a Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Fri, 15 May 2020 17:38:36 -0400 Subject: Initial implementation of b4 diff Based on feedback from Jason Gunthorpe, implement diffing of series by creating fake git-am commit ranges. Here's an easy example: b4 diff 20200511192156.1618284-1-mic@digikod.net Suggested-by: Jason Gunthorpe Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 63 +++++++++++++++++++++------ b4/command.py | 19 ++++++++ b4/diff.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ b4/mbox.py | 47 +++++++++++++++----- 4 files changed, 239 insertions(+), 24 deletions(-) create mode 100644 b4/diff.py diff --git a/b4/__init__.py b/b4/__init__.py index a3767bf..064f8b8 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -21,7 +21,8 @@ import mailbox import pwd from pathlib import Path -from tempfile import mkstemp +from tempfile import mkstemp, TemporaryDirectory +from contextlib import contextmanager from email import charset charset.add_charset('utf-8', None) @@ -401,16 +402,11 @@ class LoreSeries: self.trailer_mismatches = set() self.complete = False self.has_cover = False + self.subject = '(untitled)' def __repr__(self): out = list() - if self.has_cover: - out.append('- Series: [v%s] %s' % (self.revision, self.patches[0].subject)) - elif self.patches[1] is not None: - out.append('- Series: [v%s] %s' % (self.revision, self.patches[1].subject)) - else: - out.append('- Series: [v%s] (untitled)' % self.revision) - + out.append('- Series: [v%s] %s' % (self.revision, self.subject)) out.append(' revision: %s' % self.revision) out.append(' expected: %s' % self.expected) out.append(' complete: %s' % self.complete) @@ -442,6 +438,10 @@ class LoreSeries: else: self.patches[lmsg.counter] = lmsg self.complete = not (None in self.patches[1:]) + if self.patches[0] is not None: + self.subject = self.patches[0].subject + elif self.patches[1] is not None: + self.subject = self.patches[1].subject def get_slug(self, extended=False): # Find the first non-None entry @@ -468,7 +468,7 @@ class LoreSeries: return slug - def save_am_mbox(self, mbx, noaddtrailers, covertrailers, trailer_order=None, addmysob=False, + def save_am_mbox(self, mbx, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False, addlink=False, linkmask=None, cherrypick=None): usercfg = get_user_config() @@ -600,6 +600,9 @@ class LoreSeries: # and it's no longer going to match current hash continue seenfiles.add(fn) + if set(bh) == {'0'}: + # New file, will for sure apply clean + continue fullpath = os.path.join(topdir, fn) if when is None: if not os.path.exists(fullpath): @@ -926,10 +929,17 @@ class LoreMessage: @staticmethod def get_indexes(diff): indexes = set() - for match in re.finditer(r'^diff\s+--git\s+\w/(.*)\s+\w/.*\nindex\s+([0-9a-f]+)\.\.[0-9a-f]+\s+[0-9]+$', - diff, flags=re.I | re.M): - fname, bindex = match.groups() - indexes.add((fname, bindex)) + curfile = None + for line in diff.split('\n'): + if line.find('diff ') != 0 and line.find('index ') != 0: + continue + matches = re.search(r'^diff\s+--git\s+\w/(.*)\s+\w/(.*)$', line) + if matches and matches.groups()[0] == matches.groups()[1]: + curfile = matches.groups()[0] + continue + matches = re.search(r'^index\s+([0-9a-f]+)\.\.[0-9a-f]+.*$', line) + if matches and curfile is not None: + indexes.add((curfile, matches.groups()[0])) return indexes @staticmethod @@ -1627,6 +1637,33 @@ def git_get_command_lines(gitdir, args): return lines +@contextmanager +def git_temp_worktree(gitdir=None): + """Context manager that creates a temporary work tree and chdirs into it. The + worktree is deleted when the contex manager is closed. Taken from gj_tools.""" + dfn = None + try: + with TemporaryDirectory() as dfn: + git_run_command(gitdir, ['worktree', 'add', '--detach', '--no-checkout', dfn]) + with in_directory(dfn): + yield + finally: + if dfn is not None: + git_run_command(gitdir, ['worktree', 'remove', dfn]) + + +@contextmanager +def in_directory(dirname): + """Context manager that chdirs into a directory and restores the original + directory when closed. Taken from gj_tools.""" + cdir = os.getcwd() + try: + os.chdir(dirname) + yield True + finally: + os.chdir(cdir) + + def get_config_from_git(regexp, defaults=None): args = ['config', '-z', '--get-regexp', regexp] ecode, out = git_run_command(None, args) diff --git a/b4/command.py b/b4/command.py index 7709649..12900ff 100644 --- a/b4/command.py +++ b/b4/command.py @@ -60,6 +60,11 @@ def cmd_ty(cmdargs): b4.ty.main(cmdargs) +def cmd_diff(cmdargs): + import b4.diff + b4.diff.main(cmdargs) + + def cmd(): # noinspection PyTypeChecker parser = argparse.ArgumentParser( @@ -164,6 +169,20 @@ def cmd(): help='The --since option to use when auto-matching patches (default=1.week)') sp_ty.set_defaults(func=cmd_ty) + # b4 diff + sp_diff = subparsers.add_parser('diff', help='Show a range-diff to previous series revision') + sp_diff.add_argument('msgid', nargs='?', + help='Message ID to process, or pipe a raw message') + sp_diff.add_argument('-g', '--gitdir', default=None, + help='Operate on this git tree instead of current dir') + sp_diff.add_argument('-p', '--use-project', dest='useproject', default=None, + help='Use a specific project instead of guessing (linux-mm, linux-hardening, etc)') + sp_diff.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False, + help='Do not use local cache') + sp_diff.add_argument('-v', '--compare-versions', dest='wantvers', type=int, default=None, nargs='+', + help='Compare specific versions instead of latest and one before that, e.g. -v 3 5') + sp_diff.set_defaults(func=cmd_diff) + cmdargs = parser.parse_args() logger.setLevel(logging.DEBUG) diff --git a/b4/diff.py b/b4/diff.py new file mode 100644 index 0000000..2d9ebe0 --- /dev/null +++ b/b4/diff.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2020 by the Linux Foundation +# +__author__ = 'Konstantin Ryabitsev ' + +import os +import sys +import b4 +import b4.mbox +import mailbox +from tempfile import mkstemp + + +logger = b4.logger + + +def make_fake_commit_range(gitdir, lser): + logger.info('Preparing fake-am for v%s: %s', lser.revision, lser.subject) + with b4.git_temp_worktree(gitdir): + # We are in a temporary chdir at this time, so writing to a known file should be safe + mbxf = '.__git-am__' + mbx = mailbox.mbox(mbxf) + # Logic largely borrowed from gj_tools + seenfiles = set() + for lmsg in lser.patches[1:]: + logger.debug('Looking at %s', lmsg.full_subject) + lmsg.load_hashes() + for fn, fi in lmsg.blob_indexes: + if fn in seenfiles: + # We already processed this file, so this blob won't match + continue + seenfiles.add(fn) + if set(fi) == {'0'}: + # New file creation, nothing to do here + logger.debug(' New file: %s', fn) + continue + # Try to grab full ref_id of this hash + ecode, out = b4.git_run_command(gitdir, ['rev-parse', fi]) + if ecode > 0: + logger.critical(' ERROR: Could not find matching blob for %s (%s)', fn, fi) + # TODO: better handling + return None, None + logger.debug(' Found matching blob for: %s', fn) + fullref = out.strip() + gitargs = ['update-index', '--add', '--cacheinfo', f'0644,{fullref},{fn}'] + ecode, out = b4.git_run_command(None, gitargs) + if ecode > 0: + logger.critical(' ERROR: Could not run update-index for %s (%s)', fn, fullref) + return None, None + mbx.add(lmsg.msg.as_string(policy=b4.emlpolicy).encode('utf-8')) + + mbx.close() + ecode, out = b4.git_run_command(None, ['write-tree']) + if ecode > 0: + logger.critical('ERROR: Could not write fake-am tree') + return None, None + treeid = out.strip() + # At this point we have a worktree with files that should cleanly receive a git am + gitargs = ['commit-tree', treeid + '^{tree}', '-F', '-'] + ecode, out = b4.git_run_command(None, gitargs, stdin='Initial fake commit'.encode('utf-8')) + if ecode > 0: + logger.critical('ERROR: Could not commit-tree') + return None, None + start_commit = out.strip() + b4.git_run_command(None, ['reset', '--hard', start_commit]) + ecode, out = b4.git_run_command(None, ['am', mbxf]) + if ecode > 0: + logger.critical('ERROR: Could not fake-am version %s', lser.revision) + return None, None + ecode, out = b4.git_run_command(None, ['rev-parse', 'HEAD']) + end_commit = out.strip() + logger.info(' range: %.12s..%.12s', start_commit, end_commit) + + return start_commit, end_commit + + +def main(cmdargs): + msgid = b4.get_msgid(cmdargs) + if cmdargs.wantvers and len(cmdargs.wantvers) > 2: + logger.critical('Can only compare two versions at a time') + sys.exit(1) + + # start by grabbing the mbox provided + savefile = mkstemp('b4-diff-to')[1] + mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile, useproject=cmdargs.useproject, nocache=cmdargs.nocache) + if mboxfile is None: + logger.critical('Unable to retrieve thread: %s', msgid) + return + logger.info('Retrieved %s messages in the thread', len(mboxfile)) + b4.mbox.get_extra_series(mboxfile, direction=-1, wantvers=cmdargs.wantvers) + mbx = mailbox.mbox(mboxfile) + count = len(mbx) + logger.info('---') + logger.info('Analyzing %s messages in the thread', count) + lmbx = b4.LoreMailbox() + for key, msg in mbx.items(): + lmbx.add_message(msg) + if cmdargs.wantvers and len(cmdargs.wantvers) == 1: + upper = max(lmbx.series.keys()) + lower = cmdargs.wantvers[0] + elif cmdargs.wantvers and len(cmdargs.wantvers) == 2: + upper = max(cmdargs.wantvers) + lower = min(cmdargs.wantvers) + else: + upper = max(lmbx.series.keys()) + lower = min(lmbx.series.keys()) + + if upper not in lmbx.series: + logger.critical('Could not find revision %s', upper) + sys.exit(1) + if lower not in lmbx.series: + logger.critical('Could not find revision %s', lower) + sys.exit(1) + + # Prepare the lower fake-am range + lsc, lec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[lower]) + if lsc is None or lec is None: + logger.critical('---') + logger.critical('Could not create fake-am range for lower series v%s', lower) + os.unlink(mboxfile) + sys.exit(1) + # Prepare the upper fake-am range + usc, uec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[upper]) + if usc is None or uec is None: + logger.critical('---') + logger.critical('Could not create fake-am range for upper series v%s', upper) + os.unlink(mboxfile) + sys.exit(1) + logger.info('---') + logger.info('Success, you may now run:') + logger.info(' git range-diff %.12s..%.12s %.12s..%.12s', lsc, lec, usc, uec) + diff --git a/b4/mbox.py b/b4/mbox.py index 5b08d1a..9ff88a6 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -98,8 +98,8 @@ def mbox_to_am(mboxfile, cmdargs): cherrypick = None logger.critical('Writing %s', am_filename) mbx = mailbox.mbox(am_filename) - am_mbx = lser.save_am_mbox(mbx, cmdargs.noaddtrailers, covertrailers, - trailer_order=config['trailer-order'], + am_mbx = lser.save_am_mbox(mbx, noaddtrailers=cmdargs.noaddtrailers, + covertrailers=covertrailers, trailer_order=config['trailer-order'], addmysob=cmdargs.addmysob, addlink=cmdargs.addlink, linkmask=config['linkmask'], cherrypick=cherrypick) logger.info('---') @@ -310,7 +310,7 @@ def am_mbox_to_quilt(am_mbx, q_dirname): sfh.write('%s\n' % patch_filename) -def get_newest_series(mboxfile): +def get_extra_series(mboxfile, direction=1, wantvers=None): # Open the mbox and find the latest series mentioned in it mbx = mailbox.mbox(mboxfile) base_msg = None @@ -343,14 +343,27 @@ def get_newest_series(mboxfile): logger.debug('Not checking for new revisions: no prefixes on the cover letter.') mbx.close() return + if direction < 0 and latest_revision <= 1: + logger.debug('This is the latest version of the series') + mbx.close() + return + if direction < 0 and wantvers is None: + wantvers = [latest_revision - 1] + base_msgid = b4.LoreMessage.get_clean_msgid(base_msg) fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1] msgdate = email.utils.parsedate_tz(str(base_msg['Date'])) startdate = time.strftime('%Y%m%d', msgdate[:9]) listarc = base_msg.get_all('List-Archive')[-1].strip('<>') - q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate) - queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'})) - logger.critical('Checking for newer revisions on %s', listarc) + if direction > 0: + q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate) + queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'})) + logger.critical('Checking for newer revisions on %s', listarc) + else: + q = 's:"%s" AND f:"%s" AND d:..%s' % (lsub.subject.replace('"', ''), fromeml, startdate) + queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '1'})) + logger.critical('Checking for older revisions on %s', listarc) + logger.debug('Query URL: %s', queryurl) session = b4.get_requests_session() resp = session.get(queryurl) @@ -373,18 +386,30 @@ def get_newest_series(mboxfile): logger.debug('Ignoring result (not interesting): %s', title) continue link = entry.find('atom:link', ns).get('href') - if lsub.revision < latest_revision: + if direction > 0 and lsub.revision <= latest_revision: logger.debug('Ignoring result (not new revision): %s', title) continue + elif direction < 0 and lsub.revision >= latest_revision: + logger.debug('Ignoring result (not old revision): %s', title) + continue + elif direction < 0 and lsub.revision not in wantvers: + logger.debug('Ignoring result (not revision we want): %s', title) + continue if link.find('/%s/' % base_msgid) > 0: logger.debug('Ignoring result (same thread as ours):%s', title) continue if lsub.revision == 1 and lsub.revision == latest_revision: # Someone sent a separate message with an identical title but no new vX in the subject line - # It's *probably* a new revision. - logger.debug('Likely a new revision: %s', title) - elif lsub.revision > latest_revision: + if direction > 0: + # It's *probably* a new revision. + logger.debug('Likely a new revision: %s', title) + else: + # It's *probably* an older revision. + logger.debug('Likely an older revision: %s', title) + elif direction > 0 and lsub.revision > latest_revision: logger.debug('Definitely a new revision [v%s]: %s', lsub.revision, title) + elif direction < 0 and lsub.revision < latest_revision: + logger.debug('Definitely an older revision [v%s]: %s', lsub.revision, title) else: logger.debug('No idea what this is: %s', title) continue @@ -445,7 +470,7 @@ def main(cmdargs): sys.exit(1) if threadmbox and cmdargs.checknewer: - get_newest_series(threadmbox) + get_extra_series(threadmbox, direction=1) if cmdargs.subcmd == 'am': mbox_to_am(threadmbox, cmdargs) -- cgit v1.2.3