summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-06-21 15:53:39 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-06-21 15:53:39 -0400
commit9622ab417325c6d60f5e9e95f4e3786bf6f0e2d5 (patch)
treeba96af39d47aad8e3e64e243a45dfb71c17e5b8e
parentc5cfd019feb50632f6f125e8ce36ceec807e7a27 (diff)
downloadb4-9622ab417325c6d60f5e9e95f4e3786bf6f0e2d5.tar.gz
Reimplement --guess-base
Based on some feedback, attempt to reimplement --guess-base by looking at the file index hashes and using --find-object to locate when they were last changed. We limit this using --since and --until, so that we aren't trying to look through the entire history of the repo. For the --until date, we take the date of the patch. For the --since date, we take the timedelta using the number of days specified by --guess-lookback (default is 14 days). Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py126
-rw-r--r--b4/command.py4
-rw-r--r--b4/mbox.py63
3 files changed, 116 insertions, 77 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 8d8911d..53eaf27 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -450,6 +450,8 @@ class LoreSeries:
self.has_cover = False
self.partial_reroll = False
self.subject = '(untitled)'
+ # Used for base matching
+ self._indexes = None
def __repr__(self):
out = list()
@@ -628,49 +630,95 @@ class LoreSeries:
return msgs
- def check_applies_clean(self, gitdir, when=None):
- # Go through indexes and see if this series should apply cleanly
- mismatches = 0
- seenfiles = set()
- for lmsg in self.patches[1:]:
- if lmsg is None or lmsg.blob_indexes is None:
- continue
- for fn, bh in lmsg.blob_indexes:
- if fn in seenfiles:
- # if we have seen this file once already, then it's a repeat patch
- # and it's no longer going to match current hash
- continue
- seenfiles.add(fn)
- if set(bh) == {'0'}:
- # New file, will for sure apply clean
+ def check_applies_clean(self, gitdir, at='HEAD'):
+ if self._indexes is None:
+ self._indexes = list()
+ seenfiles = set()
+ for lmsg in self.patches[1:]:
+ if lmsg is None or lmsg.blob_indexes is None:
continue
- fullpath = os.path.join(gitdir, fn)
- if when is None:
- if not os.path.exists(fullpath):
- mismatches += 1
+ for fn, bh in lmsg.blob_indexes:
+ if fn in seenfiles:
+ # if we have seen this file once already, then it's a repeat patch
+ # and it's no longer going to match current hash
continue
- cmdargs = ['hash-object', fullpath]
- ecode, out = git_run_command(None, cmdargs)
- else:
- logger.debug('Checking hash on %s:%s', when, fn)
- # XXX: We should probably pipe the two commands instead of reading into memory,
- # so something to consider for the future
- ecode, out = git_run_command(gitdir, ['show', f'{when}:{fn}'])
- if ecode > 0:
- # Couldn't get this file, continue
- logger.debug('Could not look up %s:%s', when, fn)
- mismatches += 1
+ seenfiles.add(fn)
+ if set(bh) == {'0'}:
+ # New file, will for sure apply clean
continue
- cmdargs = ['hash-object', '--stdin']
- ecode, out = git_run_command(None, cmdargs, stdin=out.encode())
- if ecode == 0:
- if out.find(bh) != 0:
- logger.debug('%s hash: %s (expected: %s)', fn, out.strip(), bh)
- mismatches += 1
- else:
- logger.debug('%s hash: matched', fn)
+ self._indexes.append((fn, bh))
+
+ mismatches = list()
+ for fn, bh in self._indexes:
+ ecode, out = git_run_command(gitdir, ['ls-tree', at, fn])
+ if ecode == 0 and len(out):
+ chunks = out.split()
+ if chunks[2].startswith(bh):
+ logger.debug('%s hash: matched', fn)
+ continue
+ else:
+ logger.debug('%s hash: %s (expected: %s)', fn, chunks[2], bh)
+ else:
+ # Couldn't get this file, continue
+ logger.debug('Could not look up %s:%s', at, fn)
+ mismatches.append((fn, bh))
+
+ return len(self._indexes), mismatches
+
+ def find_base(self, gitdir: str, branches: str = 'HEAD', maxdays: int = 30) -> Tuple[str, len]:
+ # Find the date of the first patch we have
+ pdate = datetime.datetime.now()
+ for lmsg in self.patches:
+ if lmsg is None:
+ continue
+ pdate = lmsg.date
+ break
+
+ # Find latest commit on that date
+ guntil = pdate.strftime('%Y-%m-%d')
+ gitargs = ['log', '--pretty=oneline', '--until', guntil, '--max-count=1', '--branches', branches]
+ lines = git_get_command_lines(gitdir, gitargs)
+ if not lines:
+ raise IndexError
+ commit = lines[0].split()[0]
+ checked, mismatches = self.check_applies_clean(gitdir, commit)
+ fewest = len(mismatches)
+ if fewest > 0:
+ since = pdate - datetime.timedelta(days=maxdays)
+ gsince = since.strftime('%Y-%m-%d')
+ logger.debug('Starting --find-object from %s to %s', gsince, guntil)
+ best = commit
+ for fn, bi in mismatches:
+ logger.debug('Finding tree matching %s=%s in %s', fn, bi, branches)
+ gitargs = ['log', '-m', '--pretty=oneline', '--since', gsince, '--until', guntil,
+ '--max-count=1', '--find-object', bi, '--branches', branches]
+ lines = git_get_command_lines(gitdir, gitargs)
+ if not lines:
+ logger.debug('Could not find object %s in the tree', bi)
+ continue
+ commit = lines[0].split()[0]
+ logger.debug('commit=%s', commit)
+ # We try both that commit and the one preceding it, in case it was a delete
+ # Keep track of the fewest mismatches
+ for tc in [commit, f'{commit}~1']:
+ sc, sm = self.check_applies_clean(gitdir, tc)
+ if len(sm) < fewest and len(sm) != sc:
+ fewest = len(sm)
+ best = tc
+ logger.debug('fewest=%s, best=%s', fewest, best)
+ if fewest == 0:
+ break
+
+ if fewest == 0:
+ break
+ else:
+ best = commit
+
+ lines = git_get_command_lines(gitdir, ['describe', best])
+ if len(lines):
+ return lines[0], fewest
- return len(seenfiles), mismatches
+ raise IndexError
def make_fake_am_range(self, gitdir):
start_commit = end_commit = None
diff --git a/b4/command.py b/b4/command.py
index 2d6994d..ebbb361 100644
--- a/b4/command.py
+++ b/b4/command.py
@@ -120,6 +120,10 @@ def cmd():
'"-P *globbing*" to match on commit subject)')
sp_am.add_argument('-g', '--guess-base', dest='guessbase', action='store_true', default=False,
help='Try to guess the base of the series (if not specified)')
+ sp_am.add_argument('-b', '--guess-branch', dest='guessbranch', default='HEAD',
+ help='When guessing base, use this branch instead of HEAD (use with -g)')
+ sp_am.add_argument('--guess-lookback', dest='guessdays', type=int, default=14,
+ help='When guessing base, go back this many days from the date of the patch')
sp_am.add_argument('-3', '--prep-3way', dest='threeway', action='store_true', default=False,
help='Prepare for a 3-way merge '
'(tries to ensure that all index blobs exist by making a fake commit range)')
diff --git a/b4/mbox.py b/b4/mbox.py
index e722d05..98bd920 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -228,46 +228,33 @@ def make_am(msgs, cmdargs, msgid):
if base_commit:
logger.critical(' Base: %s', base_commit)
- logger.critical(' git checkout -b %s %s', gitbranch, base_commit)
- if cmdargs.outdir != '-':
- logger.critical(' git am %s', am_filename)
else:
- cleanmsg = ''
if topdir is not None:
- checked, mismatches = lser.check_applies_clean(topdir)
- if mismatches == 0 and checked != mismatches:
- cleanmsg = ' (applies clean to current tree)'
- elif cmdargs.guessbase:
- # Look at the last 10 tags and see if it applies cleanly to
- # any of them. I'm not sure how useful this is, but I'm going
- # to put it in for now and maybe remove later if it causes
- # problems or slowness
- if checked != mismatches:
- best_matches = mismatches
- cleanmsg = ' (best guess: current tree)'
- else:
- best_matches = None
- # sort the tags by authordate
- gitargs = ['tag', '-l', '--sort=-taggerdate']
- lines = b4.git_get_command_lines(None, gitargs)
- if lines:
- # Check last 10 tags
- for tag in lines[:10]:
- logger.debug('Checking base-commit possibility for %s', tag)
- checked, mismatches = lser.check_applies_clean(topdir, tag)
- if mismatches == 0 and checked != mismatches:
- cleanmsg = ' (applies clean to: %s)' % tag
- break
- # did they all mismatch?
- if checked == mismatches:
- continue
- if best_matches is None or mismatches < best_matches:
- best_matches = mismatches
- cleanmsg = ' (best guess: %s)' % tag
-
- logger.critical(' Base: not found%s', cleanmsg)
- if cmdargs.outdir != '-':
- logger.critical(' git am %s', am_filename)
+ checked, mismatches = lser.check_applies_clean(topdir, at=cmdargs.guessbranch)
+ if len(mismatches) == 0 and checked != mismatches:
+ logger.critical(' Base: current tree')
+ elif len(mismatches) and cmdargs.guessbase:
+ logger.critical(' attempting to guess base-commit...')
+ try:
+ base_commit, mismatches = lser.find_base(topdir, branches=cmdargs.guessbranch,
+ maxdays=cmdargs.guessdays)
+ if mismatches == 0:
+ logger.critical(' Base: %s (exact match)', base_commit)
+ else:
+ logger.critical(' Base: %s (best guess, %s blobs not matched)', base_commit,
+ mismatches)
+ except IndexError:
+ logger.critical(' Base: not specified')
+ pass
+ else:
+ logger.critical(' Base: not specified')
+ else:
+ logger.critical(' Base: not specified')
+
+ if base_commit is not None:
+ logger.critical(' git checkout -b %s %s', gitbranch, base_commit)
+ if cmdargs.outdir != '-':
+ logger.critical(' git am %s', am_filename)
thanks_record_am(lser, cherrypick=cherrypick)