From 9622ab417325c6d60f5e9e95f4e3786bf6f0e2d5 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Mon, 21 Jun 2021 15:53:39 -0400 Subject: Reimplement --guess-base Based on some feedback, attempt to reimplement --guess-base by looking at the file index hashes and using --find-object to locate when they were last changed. We limit this using --since and --until, so that we aren't trying to look through the entire history of the repo. For the --until date, we take the date of the patch. For the --since date, we take the timedelta using the number of days specified by --guess-lookback (default is 14 days). Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 126 +++++++++++++++++++++++++++++++++++++++------------------ b4/command.py | 4 ++ b4/mbox.py | 63 ++++++++++++----------------- 3 files changed, 116 insertions(+), 77 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 8d8911d..53eaf27 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -450,6 +450,8 @@ class LoreSeries: self.has_cover = False self.partial_reroll = False self.subject = '(untitled)' + # Used for base matching + self._indexes = None def __repr__(self): out = list() @@ -628,49 +630,95 @@ class LoreSeries: return msgs - def check_applies_clean(self, gitdir, when=None): - # Go through indexes and see if this series should apply cleanly - mismatches = 0 - seenfiles = set() - for lmsg in self.patches[1:]: - if lmsg is None or lmsg.blob_indexes is None: - continue - for fn, bh in lmsg.blob_indexes: - if fn in seenfiles: - # if we have seen this file once already, then it's a repeat patch - # and it's no longer going to match current hash - continue - seenfiles.add(fn) - if set(bh) == {'0'}: - # New file, will for sure apply clean + def check_applies_clean(self, gitdir, at='HEAD'): + if self._indexes is None: + self._indexes = list() + seenfiles = set() + for lmsg in self.patches[1:]: + if lmsg is None or lmsg.blob_indexes is None: continue - fullpath = os.path.join(gitdir, fn) - if when is None: - if not os.path.exists(fullpath): - mismatches += 1 + for fn, bh in lmsg.blob_indexes: + if fn in seenfiles: + # if we have seen this file once already, then it's a repeat patch + # and it's no longer going to match current hash continue - cmdargs = ['hash-object', fullpath] - ecode, out = git_run_command(None, cmdargs) - else: - logger.debug('Checking hash on %s:%s', when, fn) - # XXX: We should probably pipe the two commands instead of reading into memory, - # so something to consider for the future - ecode, out = git_run_command(gitdir, ['show', f'{when}:{fn}']) - if ecode > 0: - # Couldn't get this file, continue - logger.debug('Could not look up %s:%s', when, fn) - mismatches += 1 + seenfiles.add(fn) + if set(bh) == {'0'}: + # New file, will for sure apply clean continue - cmdargs = ['hash-object', '--stdin'] - ecode, out = git_run_command(None, cmdargs, stdin=out.encode()) - if ecode == 0: - if out.find(bh) != 0: - logger.debug('%s hash: %s (expected: %s)', fn, out.strip(), bh) - mismatches += 1 - else: - logger.debug('%s hash: matched', fn) + self._indexes.append((fn, bh)) + + mismatches = list() + for fn, bh in self._indexes: + ecode, out = git_run_command(gitdir, ['ls-tree', at, fn]) + if ecode == 0 and len(out): + chunks = out.split() + if chunks[2].startswith(bh): + logger.debug('%s hash: matched', fn) + continue + else: + logger.debug('%s hash: %s (expected: %s)', fn, chunks[2], bh) + else: + # Couldn't get this file, continue + logger.debug('Could not look up %s:%s', at, fn) + mismatches.append((fn, bh)) + + return len(self._indexes), mismatches + + def find_base(self, gitdir: str, branches: str = 'HEAD', maxdays: int = 30) -> Tuple[str, len]: + # Find the date of the first patch we have + pdate = datetime.datetime.now() + for lmsg in self.patches: + if lmsg is None: + continue + pdate = lmsg.date + break + + # Find latest commit on that date + guntil = pdate.strftime('%Y-%m-%d') + gitargs = ['log', '--pretty=oneline', '--until', guntil, '--max-count=1', '--branches', branches] + lines = git_get_command_lines(gitdir, gitargs) + if not lines: + raise IndexError + commit = lines[0].split()[0] + checked, mismatches = self.check_applies_clean(gitdir, commit) + fewest = len(mismatches) + if fewest > 0: + since = pdate - datetime.timedelta(days=maxdays) + gsince = since.strftime('%Y-%m-%d') + logger.debug('Starting --find-object from %s to %s', gsince, guntil) + best = commit + for fn, bi in mismatches: + logger.debug('Finding tree matching %s=%s in %s', fn, bi, branches) + gitargs = ['log', '-m', '--pretty=oneline', '--since', gsince, '--until', guntil, + '--max-count=1', '--find-object', bi, '--branches', branches] + lines = git_get_command_lines(gitdir, gitargs) + if not lines: + logger.debug('Could not find object %s in the tree', bi) + continue + commit = lines[0].split()[0] + logger.debug('commit=%s', commit) + # We try both that commit and the one preceding it, in case it was a delete + # Keep track of the fewest mismatches + for tc in [commit, f'{commit}~1']: + sc, sm = self.check_applies_clean(gitdir, tc) + if len(sm) < fewest and len(sm) != sc: + fewest = len(sm) + best = tc + logger.debug('fewest=%s, best=%s', fewest, best) + if fewest == 0: + break + + if fewest == 0: + break + else: + best = commit + + lines = git_get_command_lines(gitdir, ['describe', best]) + if len(lines): + return lines[0], fewest - return len(seenfiles), mismatches + raise IndexError def make_fake_am_range(self, gitdir): start_commit = end_commit = None diff --git a/b4/command.py b/b4/command.py index 2d6994d..ebbb361 100644 --- a/b4/command.py +++ b/b4/command.py @@ -120,6 +120,10 @@ def cmd(): '"-P *globbing*" to match on commit subject)') sp_am.add_argument('-g', '--guess-base', dest='guessbase', action='store_true', default=False, help='Try to guess the base of the series (if not specified)') + sp_am.add_argument('-b', '--guess-branch', dest='guessbranch', default='HEAD', + help='When guessing base, use this branch instead of HEAD (use with -g)') + sp_am.add_argument('--guess-lookback', dest='guessdays', type=int, default=14, + help='When guessing base, go back this many days from the date of the patch') sp_am.add_argument('-3', '--prep-3way', dest='threeway', action='store_true', default=False, help='Prepare for a 3-way merge ' '(tries to ensure that all index blobs exist by making a fake commit range)') diff --git a/b4/mbox.py b/b4/mbox.py index e722d05..98bd920 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -228,46 +228,33 @@ def make_am(msgs, cmdargs, msgid): if base_commit: logger.critical(' Base: %s', base_commit) - logger.critical(' git checkout -b %s %s', gitbranch, base_commit) - if cmdargs.outdir != '-': - logger.critical(' git am %s', am_filename) else: - cleanmsg = '' if topdir is not None: - checked, mismatches = lser.check_applies_clean(topdir) - if mismatches == 0 and checked != mismatches: - cleanmsg = ' (applies clean to current tree)' - elif cmdargs.guessbase: - # Look at the last 10 tags and see if it applies cleanly to - # any of them. I'm not sure how useful this is, but I'm going - # to put it in for now and maybe remove later if it causes - # problems or slowness - if checked != mismatches: - best_matches = mismatches - cleanmsg = ' (best guess: current tree)' - else: - best_matches = None - # sort the tags by authordate - gitargs = ['tag', '-l', '--sort=-taggerdate'] - lines = b4.git_get_command_lines(None, gitargs) - if lines: - # Check last 10 tags - for tag in lines[:10]: - logger.debug('Checking base-commit possibility for %s', tag) - checked, mismatches = lser.check_applies_clean(topdir, tag) - if mismatches == 0 and checked != mismatches: - cleanmsg = ' (applies clean to: %s)' % tag - break - # did they all mismatch? - if checked == mismatches: - continue - if best_matches is None or mismatches < best_matches: - best_matches = mismatches - cleanmsg = ' (best guess: %s)' % tag - - logger.critical(' Base: not found%s', cleanmsg) - if cmdargs.outdir != '-': - logger.critical(' git am %s', am_filename) + checked, mismatches = lser.check_applies_clean(topdir, at=cmdargs.guessbranch) + if len(mismatches) == 0 and checked != mismatches: + logger.critical(' Base: current tree') + elif len(mismatches) and cmdargs.guessbase: + logger.critical(' attempting to guess base-commit...') + try: + base_commit, mismatches = lser.find_base(topdir, branches=cmdargs.guessbranch, + maxdays=cmdargs.guessdays) + if mismatches == 0: + logger.critical(' Base: %s (exact match)', base_commit) + else: + logger.critical(' Base: %s (best guess, %s blobs not matched)', base_commit, + mismatches) + except IndexError: + logger.critical(' Base: not specified') + pass + else: + logger.critical(' Base: not specified') + else: + logger.critical(' Base: not specified') + + if base_commit is not None: + logger.critical(' git checkout -b %s %s', gitbranch, base_commit) + if cmdargs.outdir != '-': + logger.critical(' git am %s', am_filename) thanks_record_am(lser, cherrypick=cherrypick) -- cgit v1.2.3