diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-05-01 17:35:42 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-05-01 17:35:42 -0400 |
commit | e38edc300448da8b365a819b7f178cfb0a4f69e7 (patch) | |
tree | f856e2111f53a7590d131c40768add9d1f64aa02 | |
parent | ba6c790d0ca342b2069d71315ae0c835e5bdc599 (diff) | |
download | b4-e38edc300448da8b365a819b7f178cfb0a4f69e7.tar.gz |
Check if mbox applies to current tree
Check if all patches in the mbox would apply cleanly to the current
tree:
- find index hash..hash information in each patch
- check if git-hash-object shows exact same hashes for the current tree
- if not, try the last 10 tags to see if any of them would be a good
base-commit for the patch/series
Not sure how useful the latter part it, but it hopefully shouldn't slow
down regular operations, so I'm going to leave it in for now.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 73 | ||||
-rw-r--r-- | b4/mbox.py | 40 |
2 files changed, 97 insertions, 16 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 96c4fd6..1361532 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -580,6 +580,48 @@ class LoreSeries: return mbx + def check_applies_clean(self, topdir, when=None): + # Go through indexes and see if this series should apply cleanly + mismatches = 0 + seenfiles = set() + for lmsg in self.patches[1:]: + if lmsg.blob_indexes is None: + continue + for fn, bh in lmsg.blob_indexes: + if fn in seenfiles: + # if we have seen this file once already, then it's a repeat patch + # and it's no longer going to match current hash + continue + seenfiles.add(fn) + fullpath = os.path.join(topdir, fn) + if when is None: + if not os.path.exists(fullpath): + mismatches += 1 + continue + cmdargs = ['hash-object', fullpath] + ecode, out = git_run_command(None, cmdargs) + else: + gitdir = os.path.join(topdir, '.git') + logger.debug('Checking hash on %s:%s', when, fn) + # XXX: We should probably pipe the two commands instead of reading into memory, + # so something to consider for the future + ecode, out = git_run_command(gitdir, ['show', f'{when}:{fn}']) + if ecode > 0: + # Couldn't get this file, continue + logger.debug('Could not look up %s:%s', when, fn) + mismatches += 1 + continue + cmdargs = ['hash-object', '--stdin'] + ecode, out = git_run_command(None, cmdargs, stdin=out.encode()) + if ecode == 0: + if out.find(bh) != 0: + logger.debug('%s hash: %s (expected: %s)', fn, out.strip(), bh) + mismatches += 1 + else: + logger.debug('%s hash: matched', fn) + + return len(seenfiles), mismatches + def save_cover(self, outfile): cover_msg = self.patches[0].get_am_message(add_trailers=False, trailer_order=None) with open(outfile, 'w') as fh: @@ -626,6 +668,8 @@ class LoreMessage: self.attestation = None # Patchwork hash self.pwhash = None + # Blob indexes + self.blob_indexes = None self.msgid = LoreMessage.get_clean_msgid(self.msg) self.lsubject = LoreSubject(msg['Subject']) @@ -866,6 +910,15 @@ class LoreMessage: return hashed.hexdigest() @staticmethod + def get_indexes(diff): + indexes = set() + for match in re.finditer(r'^diff\s+--git\s+\w/(.*)\s+\w/.*\nindex\s+([0-9a-f]+)\.\.[0-9a-f]+\s+[0-9]+$', + diff, flags=re.I | re.M): + fname, bindex = match.groups() + indexes.add((fname, bindex)) + return indexes + + @staticmethod def get_clean_diff(diff): diff = diff.replace('\r', '') @@ -939,19 +992,6 @@ class LoreMessage: buflines.append(line) return difflines - @staticmethod - def get_patch_hash(diff): - # The aim is to represent the patch as if you did the following: - # git diff HEAD~.. | dos2unix | sha256sum - # - # This subroutine removes anything at the beginning of diff data, like - # diffstat or any other auxiliary data, and anything trailing at the end - # - diff = LoreMessage.get_clean_diff(diff) - phasher = hashlib.sha256() - phasher.update(diff.encode('utf-8')) - return phasher.hexdigest() - def load_hashes(self): if self.attestation is not None: return @@ -984,8 +1024,13 @@ class LoreMessage: with open(patch_out[1], 'r') as pfh: patch = pfh.read() if len(patch.strip()): - p = LoreMessage.get_patch_hash(patch) + diff = LoreMessage.get_clean_diff(patch) + phasher = hashlib.sha256() + phasher.update(diff.encode('utf-8')) + p = phasher.hexdigest() self.pwhash = LoreMessage.get_patchwork_hash(patch) + # Load the indexes, if we have them + self.blob_indexes = LoreMessage.get_indexes(diff) os.unlink(patch_out[1]) if i and m and p: @@ -131,8 +131,44 @@ def mbox_to_am(mboxfile, cmdargs): logger.critical(' git checkout -b %s %s', gitbranch, base_commit) logger.critical(' git am %s', am_filename) else: - logger.critical(' Base: not found, sorry') - logger.critical(' git checkout -b %s master', gitbranch) + cleanmsg = '' + # Are we in a git tree and if so, what is our toplevel? + gitargs = ['rev-parse', '--show-toplevel'] + lines = b4.git_get_command_lines(None, gitargs) + if len(lines) == 1: + topdir = lines[0] + checked, mismatches = lser.check_applies_clean(topdir) + if mismatches == 0 and checked != mismatches: + cleanmsg = ' (applies clean to current tree)' + else: + # Look at the last 10 tags and see if it applies cleanly to + # any of them. I'm not sure how useful this is, but I'm going + # to put it in for now and maybe remove later if it causes + # problems or slowness + if checked != mismatches: + best_matches = mismatches + cleanmsg = ' (best guess: current tree)' + else: + best_matches = None + # sort the tags by authordate + gitargs = ['tag', '-l', '--sort=-creatordate'] + lines = b4.git_get_command_lines(None, gitargs) + if lines: + # Check last 10 tags + for tag in lines[:10]: + logger.debug('Checking base-commit possibility for %s', tag) + checked, mismatches = lser.check_applies_clean(topdir, tag) + if mismatches == 0 and checked != mismatches: + base_commit = tag + break + # did they all mismatch? + if checked == mismatches: + continue + if best_matches is None or mismatches < best_matches: + best_matches = mismatches + cleanmsg = ' (best guess: %s)' % tag + + logger.critical(' Base: not found%s', cleanmsg) logger.critical(' git am %s', am_filename) am_mbx.close() |