summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-05-01 17:35:42 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-05-01 17:35:42 -0400
commite38edc300448da8b365a819b7f178cfb0a4f69e7 (patch)
treef856e2111f53a7590d131c40768add9d1f64aa02
parentba6c790d0ca342b2069d71315ae0c835e5bdc599 (diff)
downloadb4-e38edc300448da8b365a819b7f178cfb0a4f69e7.tar.gz
Check if mbox applies to current tree
Check if all patches in the mbox would apply cleanly to the current tree: - find index hash..hash information in each patch - check if git-hash-object shows exact same hashes for the current tree - if not, try the last 10 tags to see if any of them would be a good base-commit for the patch/series Not sure how useful the latter part it, but it hopefully shouldn't slow down regular operations, so I'm going to leave it in for now. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py73
-rw-r--r--b4/mbox.py40
2 files changed, 97 insertions, 16 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 96c4fd6..1361532 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -580,6 +580,48 @@ class LoreSeries:
return mbx
+ def check_applies_clean(self, topdir, when=None):
+ # Go through indexes and see if this series should apply cleanly
+ mismatches = 0
+ seenfiles = set()
+ for lmsg in self.patches[1:]:
+ if lmsg.blob_indexes is None:
+ continue
+ for fn, bh in lmsg.blob_indexes:
+ if fn in seenfiles:
+ # if we have seen this file once already, then it's a repeat patch
+ # and it's no longer going to match current hash
+ continue
+ seenfiles.add(fn)
+ fullpath = os.path.join(topdir, fn)
+ if when is None:
+ if not os.path.exists(fullpath):
+ mismatches += 1
+ continue
+ cmdargs = ['hash-object', fullpath]
+ ecode, out = git_run_command(None, cmdargs)
+ else:
+ gitdir = os.path.join(topdir, '.git')
+ logger.debug('Checking hash on %s:%s', when, fn)
+ # XXX: We should probably pipe the two commands instead of reading into memory,
+ # so something to consider for the future
+ ecode, out = git_run_command(gitdir, ['show', f'{when}:{fn}'])
+ if ecode > 0:
+ # Couldn't get this file, continue
+ logger.debug('Could not look up %s:%s', when, fn)
+ mismatches += 1
+ continue
+ cmdargs = ['hash-object', '--stdin']
+ ecode, out = git_run_command(None, cmdargs, stdin=out.encode())
+ if ecode == 0:
+ if out.find(bh) != 0:
+ logger.debug('%s hash: %s (expected: %s)', fn, out.strip(), bh)
+ mismatches += 1
+ else:
+ logger.debug('%s hash: matched', fn)
+
+ return len(seenfiles), mismatches
+
def save_cover(self, outfile):
cover_msg = self.patches[0].get_am_message(add_trailers=False, trailer_order=None)
with open(outfile, 'w') as fh:
@@ -626,6 +668,8 @@ class LoreMessage:
self.attestation = None
# Patchwork hash
self.pwhash = None
+ # Blob indexes
+ self.blob_indexes = None
self.msgid = LoreMessage.get_clean_msgid(self.msg)
self.lsubject = LoreSubject(msg['Subject'])
@@ -866,6 +910,15 @@ class LoreMessage:
return hashed.hexdigest()
@staticmethod
+ def get_indexes(diff):
+ indexes = set()
+ for match in re.finditer(r'^diff\s+--git\s+\w/(.*)\s+\w/.*\nindex\s+([0-9a-f]+)\.\.[0-9a-f]+\s+[0-9]+$',
+ diff, flags=re.I | re.M):
+ fname, bindex = match.groups()
+ indexes.add((fname, bindex))
+ return indexes
+
+ @staticmethod
def get_clean_diff(diff):
diff = diff.replace('\r', '')
@@ -939,19 +992,6 @@ class LoreMessage:
buflines.append(line)
return difflines
- @staticmethod
- def get_patch_hash(diff):
- # The aim is to represent the patch as if you did the following:
- # git diff HEAD~.. | dos2unix | sha256sum
- #
- # This subroutine removes anything at the beginning of diff data, like
- # diffstat or any other auxiliary data, and anything trailing at the end
- #
- diff = LoreMessage.get_clean_diff(diff)
- phasher = hashlib.sha256()
- phasher.update(diff.encode('utf-8'))
- return phasher.hexdigest()
-
def load_hashes(self):
if self.attestation is not None:
return
@@ -984,8 +1024,13 @@ class LoreMessage:
with open(patch_out[1], 'r') as pfh:
patch = pfh.read()
if len(patch.strip()):
- p = LoreMessage.get_patch_hash(patch)
+ diff = LoreMessage.get_clean_diff(patch)
+ phasher = hashlib.sha256()
+ phasher.update(diff.encode('utf-8'))
+ p = phasher.hexdigest()
self.pwhash = LoreMessage.get_patchwork_hash(patch)
+ # Load the indexes, if we have them
+ self.blob_indexes = LoreMessage.get_indexes(diff)
os.unlink(patch_out[1])
if i and m and p:
diff --git a/b4/mbox.py b/b4/mbox.py
index f64ed6f..5494cab 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -131,8 +131,44 @@ def mbox_to_am(mboxfile, cmdargs):
logger.critical(' git checkout -b %s %s', gitbranch, base_commit)
logger.critical(' git am %s', am_filename)
else:
- logger.critical(' Base: not found, sorry')
- logger.critical(' git checkout -b %s master', gitbranch)
+ cleanmsg = ''
+ # Are we in a git tree and if so, what is our toplevel?
+ gitargs = ['rev-parse', '--show-toplevel']
+ lines = b4.git_get_command_lines(None, gitargs)
+ if len(lines) == 1:
+ topdir = lines[0]
+ checked, mismatches = lser.check_applies_clean(topdir)
+ if mismatches == 0 and checked != mismatches:
+ cleanmsg = ' (applies clean to current tree)'
+ else:
+ # Look at the last 10 tags and see if it applies cleanly to
+ # any of them. I'm not sure how useful this is, but I'm going
+ # to put it in for now and maybe remove later if it causes
+ # problems or slowness
+ if checked != mismatches:
+ best_matches = mismatches
+ cleanmsg = ' (best guess: current tree)'
+ else:
+ best_matches = None
+ # sort the tags by authordate
+ gitargs = ['tag', '-l', '--sort=-creatordate']
+ lines = b4.git_get_command_lines(None, gitargs)
+ if lines:
+ # Check last 10 tags
+ for tag in lines[:10]:
+ logger.debug('Checking base-commit possibility for %s', tag)
+ checked, mismatches = lser.check_applies_clean(topdir, tag)
+ if mismatches == 0 and checked != mismatches:
+ base_commit = tag
+ break
+ # did they all mismatch?
+ if checked == mismatches:
+ continue
+ if best_matches is None or mismatches < best_matches:
+ best_matches = mismatches
+ cleanmsg = ' (best guess: %s)' % tag
+
+ logger.critical(' Base: not found%s', cleanmsg)
logger.critical(' git am %s', am_filename)
am_mbx.close()