6 files changed, 238 insertions, 124 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index b6f25fe..3cdad1c 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -587,7 +587,7 @@ class LoreSeries:
 
         return mbx
 
-    def check_applies_clean(self, topdir, when=None):
+    def check_applies_clean(self, gitdir, when=None):
         # Go through indexes and see if this series should apply cleanly
         mismatches = 0
         seenfiles = set()
@@ -603,7 +603,7 @@ class LoreSeries:
                 if set(bh) == {'0'}:
                     # New file, will for sure apply clean
                     continue
-                fullpath = os.path.join(topdir, fn)
+                fullpath = os.path.join(gitdir, fn)
                 if when is None:
                     if not os.path.exists(fullpath):
                         mismatches += 1
@@ -611,7 +611,7 @@ class LoreSeries:
                     cmdargs = ['hash-object', fullpath]
                     ecode, out = git_run_command(None, cmdargs)
                 else:
-                    gitdir = os.path.join(topdir, '.git')
+                    gitdir = os.path.join(gitdir, '.git')
                     logger.debug('Checking hash on %s:%s', when, fn)
                     # XXX: We should probably pipe the two commands instead of reading into memory,
                     #      so something to consider for the future
@@ -632,6 +632,115 @@ class LoreSeries:
 
         return len(seenfiles), mismatches
 
+    def make_fake_am_range(self, gitdir):
+        start_commit = end_commit = None
+        # Do we have it in cache already?
+        cachedir = get_cache_dir()
+        # Use the msgid of the first non-None patch in the series
+        msgid = None
+        for lmsg in self.patches:
+            if lmsg is not None:
+                msgid = lmsg.msgid
+                break
+        if msgid is None:
+            logger.critical('Cannot operate on an empty series')
+            return None, None
+        cachefile = os.path.join(cachedir, '%s.fakeam' % urllib.parse.quote_plus(msgid))
+        if os.path.exists(cachefile):
+            stalecache = False
+            with open(cachefile, 'r') as fh:
+                cachedata = fh.read()
+                chunks = cachedata.strip().split()
+                if len(chunks) == 2:
+                    start_commit, end_commit = chunks
+                else:
+                    stalecache = True
+            if start_commit is not None and end_commit is not None:
+                # Make sure they are still there
+                ecode, out = git_run_command(gitdir, ['cat-file', '-e', start_commit])
+                if ecode > 0:
+                    stalecache = True
+                else:
+                    ecode, out = git_run_command(gitdir, ['cat-file', '-e', end_commit])
+                    if ecode > 0:
+                        stalecache = True
+                    else:
+                        logger.debug('Using previously generated range')
+                        return start_commit, end_commit
+
+            if stalecache:
+                logger.debug('Stale cache for [v%s] %s', self.revision, self.subject)
+                os.unlink(cachefile)
+
+        logger.info('Preparing fake-am for v%s: %s', self.revision, self.subject)
+        with git_temp_worktree(gitdir):
+            # We are in a temporary chdir at this time, so writing to a known file should be safe
+            mbxf = '.__git-am__'
+            mbx = mailbox.mbox(mbxf)
+            # Logic largely borrowed from gj_tools
+            seenfiles = set()
+            for lmsg in self.patches[1:]:
+                logger.debug('Looking at %s', lmsg.full_subject)
+                lmsg.load_hashes()
+                if not len(lmsg.blob_indexes):
+                    logger.critical('ERROR: some patches do not have indexes')
+                    logger.critical('       unable to create a fake-am range')
+                    return None, None
+                for fn, fi in lmsg.blob_indexes:
+                    if fn in seenfiles:
+                        # We already processed this file, so this blob won't match
+                        continue
+                    seenfiles.add(fn)
+                    if set(fi) == {'0'}:
+                        # New file creation, nothing to do here
+                        logger.debug('  New file: %s', fn)
+                        continue
+                    # Try to grab full ref_id of this hash
+                    ecode, out = git_run_command(gitdir, ['rev-parse', fi])
+                    if ecode > 0:
+                        logger.critical('  ERROR: Could not find matching blob for %s (%s)', fn, fi)
+                        logger.critical('         If you know on which tree this patchset is based,')
+                        logger.critical('         add it as a remote and perform "git remote update"')
+                        logger.critical('         in order to fetch the missing objects.')
+                        return None, None
+                    logger.debug('  Found matching blob for: %s', fn)
+                    fullref = out.strip()
+                    gitargs = ['update-index', '--add', '--cacheinfo', f'0644,{fullref},{fn}']
+                    ecode, out = git_run_command(None, gitargs)
+                    if ecode > 0:
+                        logger.critical('  ERROR: Could not run update-index for %s (%s)', fn, fullref)
+                        return None, None
+                mbx.add(lmsg.msg.as_string(policy=emlpolicy).encode('utf-8'))
+
+            mbx.close()
+            ecode, out = git_run_command(None, ['write-tree'])
+            if ecode > 0:
+                logger.critical('ERROR: Could not write fake-am tree')
+                return None, None
+            treeid = out.strip()
+            # At this point we have a worktree with files that should cleanly receive a git am
+            gitargs = ['commit-tree', treeid + '^{tree}', '-F', '-']
+            ecode, out = git_run_command(None, gitargs, stdin='Initial fake commit'.encode('utf-8'))
+            if ecode > 0:
+                logger.critical('ERROR: Could not commit-tree')
+                return None, None
+            start_commit = out.strip()
+            git_run_command(None, ['reset', '--hard', start_commit])
+            ecode, out = git_run_command(None, ['am', mbxf])
+            if ecode > 0:
+                logger.critical('ERROR: Could not fake-am version %s', self.revision)
+                return None, None
+            ecode, out = git_run_command(None, ['rev-parse', 'HEAD'])
+            end_commit = out.strip()
+            logger.info('  range: %.12s..%.12s', start_commit, end_commit)
+
+        with open(cachefile, 'w') as fh:
+            logger.debug('Saving into cache: %s', cachefile)
+            logger.debug('    %s..%s', start_commit, end_commit)
+            fh.write(f'{start_commit} {end_commit}\n')
+
+        return start_commit, end_commit
+
     def save_cover(self, outfile):
         cover_msg = self.patches[0].get_am_message(add_trailers=False, trailer_order=None)
         with open(outfile, 'w') as fh:
diff --git a/b4/command.py b/b4/command.py
index 2964f54..d0d084e 100644
--- a/b4/command.py
+++ b/b4/command.py
@@ -108,6 +108,9 @@ def cmd():
                             '"-P *globbing*" to match on commit subject)')
     sp_am.add_argument('-g', '--guess-base', dest='guessbase', action='store_true', default=False,
                        help='Try to guess the base of the series (if not specified)')
+    sp_am.add_argument('-3', '--prep-3way', dest='threeway', action='store_true', default=False,
+                       help='Prepare for a 3-way merge '
+                            '(tries to ensure that all index blobs exist by making a fake commit range)')
     sp_am.set_defaults(func=cmd_am)
 
     # b4 attest
diff --git a/b4/diff.py b/b4/diff.py
index 7ff4e47..ab23b0c 100644
--- a/b4/diff.py
+++ b/b4/diff.py
@@ -19,114 +19,6 @@ from tempfile import mkstemp
 logger = b4.logger
 
 
-def make_fake_commit_range(gitdir, lser):
-    start_commit = end_commit = None
-    # Do we have it in cache already?
-    cachedir = b4.get_cache_dir()
-    # Use the msgid of the first non-None patch in the series
-    msgid = None
-    for lmsg in lser.patches:
-        if lmsg is not None:
-            msgid = lmsg.msgid
-            break
-    if msgid is None:
-        logger.critical('Cannot operate on an empty series')
-        return None, None
-    cachefile = os.path.join(cachedir, '%s.fakeam' % urllib.parse.quote_plus(msgid))
-    if os.path.exists(cachefile):
-        stalecache = False
-        with open(cachefile, 'r') as fh:
-            cachedata = fh.read()
-            chunks = cachedata.strip().split()
-            if len(chunks) == 2:
-                start_commit, end_commit = chunks
-            else:
-                stalecache = True
-        if start_commit is not None and end_commit is not None:
-            # Make sure they are still there
-            ecode, out = b4.git_run_command(gitdir, ['cat-file', '-e', start_commit])
-            if ecode > 0:
-                stalecache = True
-            else:
-                ecode, out = b4.git_run_command(gitdir, ['cat-file', '-e', end_commit])
-                if ecode > 0:
-                    stalecache = True
-                else:
-                    logger.debug('Using previously generated range')
-                    return start_commit, end_commit
-
-        if stalecache:
-            logger.debug('Stale cache for [v%s] %s', lser.revision, lser.subject)
-            os.unlink(cachefile)
-
-    logger.info('Preparing fake-am for v%s: %s', lser.revision, lser.subject)
-    with b4.git_temp_worktree(gitdir):
-        # We are in a temporary chdir at this time, so writing to a known file should be safe
-        mbxf = '.__git-am__'
-        mbx = mailbox.mbox(mbxf)
-        # Logic largely borrowed from gj_tools
-        seenfiles = set()
-        for lmsg in lser.patches[1:]:
-            logger.debug('Looking at %s', lmsg.full_subject)
-            lmsg.load_hashes()
-            if not len(lmsg.blob_indexes):
-                logger.critical('ERROR: some patches do not have indexes')
-                logger.critical('       automatic range-diff would be misleading')
-                return None, None
-            for fn, fi in lmsg.blob_indexes:
-                if fn in seenfiles:
-                    # We already processed this file, so this blob won't match
-                    continue
-                seenfiles.add(fn)
-                if set(fi) == {'0'}:
-                    # New file creation, nothing to do here
-                    logger.debug('  New file: %s', fn)
-                    continue
-                # Try to grab full ref_id of this hash
-                ecode, out = b4.git_run_command(gitdir, ['rev-parse', fi])
-                if ecode > 0:
-                    logger.critical('  ERROR: Could not find matching blob for %s (%s)', fn, fi)
-                    # TODO: better handling
-                    return None, None
-                logger.debug('  Found matching blob for: %s', fn)
-                fullref = out.strip()
-                gitargs = ['update-index', '--add', '--cacheinfo', f'0644,{fullref},{fn}']
-                ecode, out = b4.git_run_command(None, gitargs)
-                if ecode > 0:
-                    logger.critical('  ERROR: Could not run update-index for %s (%s)', fn, fullref)
-                    return None, None
-            mbx.add(lmsg.msg.as_string(policy=b4.emlpolicy).encode('utf-8'))
-
-        mbx.close()
-        ecode, out = b4.git_run_command(None, ['write-tree'])
-        if ecode > 0:
-            logger.critical('ERROR: Could not write fake-am tree')
-            return None, None
-        treeid = out.strip()
-        # At this point we have a worktree with files that should cleanly receive a git am
-        gitargs = ['commit-tree', treeid + '^{tree}', '-F', '-']
-        ecode, out = b4.git_run_command(None, gitargs, stdin='Initial fake commit'.encode('utf-8'))
-        if ecode > 0:
-            logger.critical('ERROR: Could not commit-tree')
-            return None, None
-        start_commit = out.strip()
-        b4.git_run_command(None, ['reset', '--hard', start_commit])
-        ecode, out = b4.git_run_command(None, ['am', mbxf])
-        if ecode > 0:
-            logger.critical('ERROR: Could not fake-am version %s', lser.revision)
-            return None, None
-        ecode, out = b4.git_run_command(None, ['rev-parse', 'HEAD'])
-        end_commit = out.strip()
-        logger.info('  range: %.12s..%.12s', start_commit, end_commit)
-
-    with open(cachefile, 'w') as fh:
-        logger.debug('Saving into cache: %s', cachefile)
-        logger.debug('    %s..%s', start_commit, end_commit)
-        fh.write(f'{start_commit} {end_commit}\n')
-
-    return start_commit, end_commit
-
-
 def diff_same_thread_series(cmdargs):
     msgid = b4.get_msgid(cmdargs)
     wantvers = cmdargs.wantvers
@@ -221,13 +113,13 @@ def main(cmdargs):
         sys.exit(1)
 
     # Prepare the lower fake-am range
-    lsc, lec = make_fake_commit_range(cmdargs.gitdir, lser)
+    lsc, lec = lser.make_fake_am_range(gitdir=cmdargs.gitdir)
     if lsc is None or lec is None:
         logger.critical('---')
         logger.critical('Could not create fake-am range for lower series v%s', lser.revision)
         sys.exit(1)
     # Prepare the upper fake-am range
-    usc, uec = make_fake_commit_range(cmdargs.gitdir, user)
+    usc, uec = user.make_fake_am_range(gitdir=cmdargs.gitdir)
     if usc is None or uec is None:
         logger.critical('---')
         logger.critical('Could not create fake-am range for upper series v%s', user.revision)
diff --git a/b4/mbox.py b/b4/mbox.py
index abcad74..50e1471 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -123,6 +123,23 @@ def mbox_to_am(mboxfile, cmdargs):
             logger.critical('         From: %s <%s>', fname, femail)
         logger.critical('NOTE: Rerun with -S to apply them anyway')
 
+    topdir = None
+    # Are we in a git tree and if so, what is our toplevel?
+    gitargs = ['rev-parse', '--show-toplevel']
+    lines = b4.git_get_command_lines(None, gitargs)
+    if len(lines) == 1:
+        topdir = lines[0]
+
+    if cmdargs.threeway:
+        if not topdir:
+            logger.critical('WARNING: cannot prepare 3-way (not in a git dir)')
+        elif not lser.complete:
+            logger.critical('WARNING: cannot prepare 3-way (series incomplete)')
+        else:
+            rstart, rend = lser.make_fake_am_range(gitdir=None)
+            if rstart and rend:
+                logger.info('Prepared a fake commit range for 3-way merge (%.12s..%.12s)', rstart, rend)
+
     logger.critical('---')
     if not lser.complete:
         logger.critical('WARNING: Thread incomplete!')
@@ -165,11 +182,7 @@ def mbox_to_am(mboxfile, cmdargs):
         logger.critical('       git am %s', am_filename)
     else:
         cleanmsg = ''
-        # Are we in a git tree and if so, what is our toplevel?
-        gitargs = ['rev-parse', '--show-toplevel']
-        lines = b4.git_get_command_lines(None, gitargs)
-        if len(lines) == 1:
-            topdir = lines[0]
+        if topdir is not None:
             checked, mismatches = lser.check_applies_clean(topdir)
             if mismatches == 0 and checked != mismatches:
                 cleanmsg = ' (applies clean to current tree)'
diff --git a/man/b4.5 b/man/b4.5
index 0e7df0e..9ea05d0 100644
--- a/man/b4.5
+++ b/man/b4.5
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH B4 5 "2020-04-10" "0.4.1" ""
+.TH B4 5 "2020-05-25" "0.5.0" ""
 .SH NAME
 B4 \- Work with code submissions in a public-inbox archive
 .
@@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
 ..
 .SH SYNOPSIS
 .sp
-b4 {mbox,am,attest,pr,ty} [options]
+b4 {mbox,am,attest,pr,ty,diff} [options]
 .SH DESCRIPTION
 .sp
 This is a helper utility to work with patches and pull requests made
@@ -54,6 +54,8 @@ precursor to Lore and Data in the Star Trek universe.
 \fIb4 pr\fP: (EXPERIMENTAL) Work with pull requests
 .IP \(bu 2
 \fIb4 ty\fP: (EXPERIMENTAL) Create templated replies for processed patches and pull requests
+.IP \(bu 2
+\fIb4 diff\fP: (EXPERIMENTAL) Show range\-diff style diffs between patch versions
 .UNINDENT
 .SH OPTIONS
 .INDENT 0.0
@@ -166,6 +168,13 @@ Save mbox patches in a quilt\-ready folder
 Cherry\-pick a subset of patches (e.g. "\-P 1\-2,4,6\-",
 "\-P _" to use just the msgid specified,
 or "\-P *globbing*" to match on commit subject)
+.TP
+.B \-g\fP,\fB  \-\-guess\-base
+Try to guess the base of the series (if not specified)
+.TP
+.B \-3\fP,\fB  \-\-prep\-3way
+Prepare for a 3\-way merge (tries to ensure that all
+index blobs exist by making a fake commit range)
 .UNINDENT
 .UNINDENT
 .sp
@@ -272,6 +281,57 @@ The \-\-since option to use when auto\-matching patches (default=1.week)
 .UNINDENT
 .sp
 \fIExample\fP: b4 ty \-\-auto
+.SS b4 diff
+.sp
+usage: b4 diff [\-h] [\-g GITDIR] [\-p USEPROJECT] [\-C] [\-v WANTVERS [WANTVERS ...]] [\-n] [\-o OUTDIFF] [\-c] [\-m AMBOX AMBOX] [msgid]
+.INDENT 0.0
+.TP
+.B positional arguments:
+msgid                 Message ID to process, pipe a raw message, or use \-m
+.UNINDENT
+.sp
+optional arguments:
+.INDENT 0.0
+.INDENT 3.5
+.INDENT 0.0
+.TP
+.B \-h\fP,\fB  \-\-help
+show this help message and exit
+.TP
+.BI \-g \ GITDIR\fP,\fB \ \-\-gitdir \ GITDIR
+Operate on this git tree instead of current dir
+.TP
+.BI \-p \ USEPROJECT\fP,\fB \ \-\-use\-project \ USEPROJECT
+Use a specific project instead of guessing (linux\-mm, linux\-hardening, etc)
+.TP
+.B \-C\fP,\fB  \-\-no\-cache
+Do not use local cache
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-v WANTVERS [WANTVERS ...], \-\-compare\-versions WANTVERS [WANTVERS ...]
+Compare specific versions instead of latest and one before that, e.g. \-v 3 5
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-n\fP,\fB  \-\-no\-diff
+Do not generate a diff, just show the command to do it
+.TP
+.BI \-o \ OUTDIFF\fP,\fB \ \-\-output\-diff \ OUTDIFF
+Save diff into this file instead of outputting to stdout
+.TP
+.B \-c\fP,\fB  \-\-color
+Force color output even when writing to file
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-m AMBOX AMBOX, \-\-compare\-am\-mboxes AMBOX AMBOX
+Compare two mbx files prepared with "b4 am"
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+\fIExample\fP: b4 diff
 .SH CONFIGURATION
 .sp
 B4 configuration is handled via git\-config(1), so you can store it in
diff --git a/man/b4.5.rst b/man/b4.5.rst
index c95b172..d8e28f6 100644
--- a/man/b4.5.rst
+++ b/man/b4.5.rst
@@ -5,15 +5,15 @@ Work with code submissions in a public-inbox archive
 ----------------------------------------------------
 
 :Author:    mricon@kernel.org
-:Date:      2020-04-10
+:Date:      2020-05-25
 :Copyright: The Linux Foundation and contributors
 :License:   GPLv2+
-:Version:   0.4.1
+:Version:   0.5.0
 :Manual section: 5
 
 SYNOPSIS
 --------
-b4 {mbox,am,attest,pr,ty} [options]
+b4 {mbox,am,attest,pr,ty,diff} [options]
 
 DESCRIPTION
 -----------
@@ -32,6 +32,7 @@ SUBCOMMANDS
 * *b4 attest*: (EXPERIMENTAL) Submit cryptographic attestation for patches
 * *b4 pr*: (EXPERIMENTAL) Work with pull requests
 * *b4 ty*: (EXPERIMENTAL) Create templated replies for processed patches and pull requests
+* *b4 diff*: (EXPERIMENTAL) Show range-diff style diffs between patch versions
 
 OPTIONS
 -------
@@ -105,7 +106,11 @@ optional arguments:
                         Cherry-pick a subset of patches (e.g. "-P 1-2,4,6-",
                         "-P _" to use just the msgid specified,
                         or "-P \*globbing\*" to match on commit subject)
- 
+  -g, --guess-base
+                        Try to guess the base of the series (if not specified)
+  -3, --prep-3way
+                        Prepare for a 3-way merge (tries to ensure that all
+                        index blobs exist by making a fake commit range)
 
 *Example*: b4 am 20200313231252.64999-1-keescook@chromium.org
 
@@ -173,6 +178,38 @@ optional arguments:
 
 *Example*: b4 ty --auto
 
+b4 diff
+~~~~~~~
+usage: b4 diff [-h] [-g GITDIR] [-p USEPROJECT] [-C] [-v WANTVERS [WANTVERS ...]] [-n] [-o OUTDIFF] [-c] [-m AMBOX AMBOX] [msgid]
+
+positional arguments:
+  msgid                 Message ID to process, pipe a raw message, or use -m
+
+optional arguments:
+
+  -h, --help            show this help message and exit
+  -g GITDIR, --gitdir GITDIR
+                        Operate on this git tree instead of current dir
+  -p USEPROJECT, --use-project USEPROJECT
+                        Use a specific project instead of guessing (linux-mm, linux-hardening, etc)
+  -C, --no-cache        Do not use local cache
+
+  -v WANTVERS [WANTVERS ...], --compare-versions WANTVERS [WANTVERS ...]
+                        Compare specific versions instead of latest and one before that, e.g. -v 3 5
+
+  -n, --no-diff
+                        Do not generate a diff, just show the command to do it
+
+  -o OUTDIFF, --output-diff OUTDIFF
+                        Save diff into this file instead of outputting to stdout
+  -c, --color
+                        Force color output even when writing to file
+
+  -m AMBOX AMBOX, --compare-am-mboxes AMBOX AMBOX
+                        Compare two mbx files prepared with "b4 am"
+
+*Example*: b4 diff
+
 CONFIGURATION
 -------------
 B4 configuration is handled via git-config(1), so you can store it in