From e38edc300448da8b365a819b7f178cfb0a4f69e7 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Fri, 1 May 2020 17:35:42 -0400 Subject: Check if mbox applies to current tree Check if all patches in the mbox would apply cleanly to the current tree: - find index hash..hash information in each patch - check if git-hash-object shows exact same hashes for the current tree - if not, try the last 10 tags to see if any of them would be a good base-commit for the patch/series Not sure how useful the latter part it, but it hopefully shouldn't slow down regular operations, so I'm going to leave it in for now. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++----------- b4/mbox.py | 40 ++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 96c4fd6..1361532 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -580,6 +580,48 @@ class LoreSeries: return mbx + def check_applies_clean(self, topdir, when=None): + # Go through indexes and see if this series should apply cleanly + mismatches = 0 + seenfiles = set() + for lmsg in self.patches[1:]: + if lmsg.blob_indexes is None: + continue + for fn, bh in lmsg.blob_indexes: + if fn in seenfiles: + # if we have seen this file once already, then it's a repeat patch + # and it's no longer going to match current hash + continue + seenfiles.add(fn) + fullpath = os.path.join(topdir, fn) + if when is None: + if not os.path.exists(fullpath): + mismatches += 1 + continue + cmdargs = ['hash-object', fullpath] + ecode, out = git_run_command(None, cmdargs) + else: + gitdir = os.path.join(topdir, '.git') + logger.debug('Checking hash on %s:%s', when, fn) + # XXX: We should probably pipe the two commands instead of reading into memory, + # so something to consider for the future + ecode, out = git_run_command(gitdir, ['show', f'{when}:{fn}']) + if ecode > 0: + # Couldn't get this file, continue + logger.debug('Could not look up %s:%s', when, fn) + mismatches += 1 + continue + cmdargs = ['hash-object', '--stdin'] + ecode, out = git_run_command(None, cmdargs, stdin=out.encode()) + if ecode == 0: + if out.find(bh) != 0: + logger.debug('%s hash: %s (expected: %s)', fn, out.strip(), bh) + mismatches += 1 + else: + logger.debug('%s hash: matched', fn) + + return len(seenfiles), mismatches + def save_cover(self, outfile): cover_msg = self.patches[0].get_am_message(add_trailers=False, trailer_order=None) with open(outfile, 'w') as fh: @@ -626,6 +668,8 @@ class LoreMessage: self.attestation = None # Patchwork hash self.pwhash = None + # Blob indexes + self.blob_indexes = None self.msgid = LoreMessage.get_clean_msgid(self.msg) self.lsubject = LoreSubject(msg['Subject']) @@ -865,6 +909,15 @@ class LoreMessage: return hashed.hexdigest() + @staticmethod + def get_indexes(diff): + indexes = set() + for match in re.finditer(r'^diff\s+--git\s+\w/(.*)\s+\w/.*\nindex\s+([0-9a-f]+)\.\.[0-9a-f]+\s+[0-9]+$', + diff, flags=re.I | re.M): + fname, bindex = match.groups() + indexes.add((fname, bindex)) + return indexes + @staticmethod def get_clean_diff(diff): diff = diff.replace('\r', '') @@ -939,19 +992,6 @@ class LoreMessage: buflines.append(line) return difflines - @staticmethod - def get_patch_hash(diff): - # The aim is to represent the patch as if you did the following: - # git diff HEAD~.. | dos2unix | sha256sum - # - # This subroutine removes anything at the beginning of diff data, like - # diffstat or any other auxiliary data, and anything trailing at the end - # - diff = LoreMessage.get_clean_diff(diff) - phasher = hashlib.sha256() - phasher.update(diff.encode('utf-8')) - return phasher.hexdigest() - def load_hashes(self): if self.attestation is not None: return @@ -984,8 +1024,13 @@ class LoreMessage: with open(patch_out[1], 'r') as pfh: patch = pfh.read() if len(patch.strip()): - p = LoreMessage.get_patch_hash(patch) + diff = LoreMessage.get_clean_diff(patch) + phasher = hashlib.sha256() + phasher.update(diff.encode('utf-8')) + p = phasher.hexdigest() self.pwhash = LoreMessage.get_patchwork_hash(patch) + # Load the indexes, if we have them + self.blob_indexes = LoreMessage.get_indexes(diff) os.unlink(patch_out[1]) if i and m and p: diff --git a/b4/mbox.py b/b4/mbox.py index f64ed6f..5494cab 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -131,8 +131,44 @@ def mbox_to_am(mboxfile, cmdargs): logger.critical(' git checkout -b %s %s', gitbranch, base_commit) logger.critical(' git am %s', am_filename) else: - logger.critical(' Base: not found, sorry') - logger.critical(' git checkout -b %s master', gitbranch) + cleanmsg = '' + # Are we in a git tree and if so, what is our toplevel? + gitargs = ['rev-parse', '--show-toplevel'] + lines = b4.git_get_command_lines(None, gitargs) + if len(lines) == 1: + topdir = lines[0] + checked, mismatches = lser.check_applies_clean(topdir) + if mismatches == 0 and checked != mismatches: + cleanmsg = ' (applies clean to current tree)' + else: + # Look at the last 10 tags and see if it applies cleanly to + # any of them. I'm not sure how useful this is, but I'm going + # to put it in for now and maybe remove later if it causes + # problems or slowness + if checked != mismatches: + best_matches = mismatches + cleanmsg = ' (best guess: current tree)' + else: + best_matches = None + # sort the tags by authordate + gitargs = ['tag', '-l', '--sort=-creatordate'] + lines = b4.git_get_command_lines(None, gitargs) + if lines: + # Check last 10 tags + for tag in lines[:10]: + logger.debug('Checking base-commit possibility for %s', tag) + checked, mismatches = lser.check_applies_clean(topdir, tag) + if mismatches == 0 and checked != mismatches: + base_commit = tag + break + # did they all mismatch? + if checked == mismatches: + continue + if best_matches is None or mismatches < best_matches: + best_matches = mismatches + cleanmsg = ' (best guess: %s)' % tag + + logger.critical(' Base: not found%s', cleanmsg) logger.critical(' git am %s', am_filename) am_mbx.close() -- cgit v1.2.3