aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-04-15 12:42:21 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-04-15 12:42:21 -0400
commit28b6825da28519b6ca0e8b3bce57700a1120ca9a (patch)
tree7fb99d73ba8103752259eacd93920e343c2e38eb
parent4be04f0af9a22087d0052838e013ef2de1ce2ac3 (diff)
downloadb4-28b6825da28519b6ca0e8b3bce57700a1120ca9a.tar.gz
Switch to using patchwork-compatible hashes
Using strict attestation hashes for auto-thankinator is problematic, because "git am" uses a certain degree of fuzzing, so when we try to find applied patches by running "git diff" on actual commits, line counts may not be bit-for-bit identical. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py79
-rw-r--r--b4/mbox.py2
-rw-r--r--b4/ty.py41
3 files changed, 91 insertions, 31 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index d432ab1..626042a 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -620,6 +620,8 @@ class LoreMessage:
self.pr_remote_tip_commit = None
self.attestation = None
+ # Patchwork hash
+ self.pwhash = None
self.msgid = LoreMessage.get_clean_msgid(self.msg)
self.lsubject = LoreSubject(msg['Subject'])
@@ -815,20 +817,57 @@ class LoreMessage:
return msgid
@staticmethod
- def get_patch_hash(diff):
- # The aim is to represent the patch as if you did the following:
- # git diff HEAD~.. | dos2unix | sha256sum
- #
- # This subroutine removes anything at the beginning of diff data, like
- # diffstat or any other auxiliary data, and anything trailing at the end
- # XXX: This currently doesn't work for git binary patches
- #
+ def get_patchwork_hash(diff):
+ # Make sure we just have the diff without any extraneous content.
+ diff = LoreMessage.get_clean_diff(diff)
+ """Generate a hash from a diff. Lifted verbatim from patchwork."""
+
+ prefixes = ['-', '+', ' ']
+ hashed = hashlib.sha1()
+
+ for line in diff.split('\n'):
+ if len(line) <= 0:
+ continue
+
+ hunk_match = HUNK_RE.match(line)
+ filename_match = FILENAME_RE.match(line)
+
+ if filename_match:
+ # normalise -p1 top-directories
+ if filename_match.group(1) == '---':
+ filename = 'a/'
+ else:
+ filename = 'b/'
+ filename += '/'.join(filename_match.group(2).split('/')[1:])
+
+ line = filename_match.group(1) + ' ' + filename
+ elif hunk_match:
+ # remove line numbers, but leave line counts
+ def fn(x):
+ if not x:
+ return 1
+ return int(x)
+
+ line_nos = list(map(fn, hunk_match.groups()))
+ line = '@@ -%d +%d @@' % tuple(line_nos)
+ elif line[0] in prefixes:
+ # if we have a +, - or context line, leave as-is
+ pass
+ else:
+ # other lines are ignored
+ continue
+
+ hashed.update((line + '\n').encode('utf-8'))
+
+ return hashed.hexdigest()
+
+ @staticmethod
+ def get_clean_diff(diff):
diff = diff.replace('\r', '')
# For keeping a buffer of lines preceding @@ ... @@
buflines = list()
-
- phasher = hashlib.sha256()
+ difflines = ''
# Used for counting where we are in the patch
pp = 0
@@ -846,21 +885,34 @@ class LoreMessage:
break
addlines.append(bline)
if addlines:
- phasher.update(('\n'.join(reversed(addlines)) + '\n').encode('utf-8'))
+ difflines += '\n'.join(reversed(addlines)) + '\n'
buflines = list()
# Feed this line to the hasher
- phasher.update((line + '\n').encode('utf-8'))
+ difflines += line + '\n'
continue
if pp > 0:
# Inside the patch
- phasher.update((line + '\n').encode('utf-8'))
+ difflines += line + '\n'
if len(line) and line[0] == '-':
continue
pp -= 1
continue
# Not anything we recognize, so stick into buflines
buflines.append(line)
+ return difflines
+ @staticmethod
+ def get_patch_hash(diff):
+ # The aim is to represent the patch as if you did the following:
+ # git diff HEAD~.. | dos2unix | sha256sum
+ #
+ # This subroutine removes anything at the beginning of diff data, like
+ # diffstat or any other auxiliary data, and anything trailing at the end
+ # XXX: This currently doesn't work for git binary patches
+ #
+ diff = LoreMessage.get_clean_diff(diff)
+ phasher = hashlib.sha256()
+ phasher.update(diff.encode('utf-8'))
return phasher.hexdigest()
def load_hashes(self):
@@ -896,6 +948,7 @@ class LoreMessage:
patch = pfh.read()
if len(patch.strip()):
p = LoreMessage.get_patch_hash(patch)
+ self.pwhash = LoreMessage.get_patchwork_hash(patch)
os.unlink(patch_out[1])
if i and m and p:
diff --git a/b4/mbox.py b/b4/mbox.py
index 8c4ddec..f64ed6f 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -161,7 +161,7 @@ def thanks_record_am(lser):
if pmsg.attestation is None:
logger.debug('Unable to get hashes for all patches, not tracking for thanks')
return
- patches.append((pmsg.subject, pmsg.attestation.p))
+ patches.append((pmsg.subject, pmsg.pwhash))
lmsg = lser.patches[0]
if lmsg is None:
diff --git a/b4/ty.py b/b4/ty.py
index b1861e4..888da9b 100644
--- a/b4/ty.py
+++ b/b4/ty.py
@@ -158,7 +158,8 @@ def get_all_commits(gitdir, branch, since='1.week', committer=None):
for line in lines:
commit_id, subject = line.split(maxsplit=1)
ecode, out = git_get_rev_diff(gitdir, commit_id)
- pwhash = b4.LoreMessage.get_patch_hash(out)
+ pwhash = b4.LoreMessage.get_patchwork_hash(out)
+ logger.debug('phash=%s', pwhash)
MY_COMMITS[pwhash] = (commit_id, subject)
return MY_COMMITS
@@ -171,6 +172,7 @@ def auto_locate_series(gitdir, jsondata, branch, since='1.week', loose=False):
# We need to find all of them in the commits
found = list()
for patch in jsondata['patches']:
+ logger.debug('Checking %s', patch)
if patch[1] in patchids:
logger.debug('Found: %s', patch[0])
found.append(commits[patch[1]])
@@ -182,6 +184,7 @@ def auto_locate_series(gitdir, jsondata, branch, since='1.week', loose=False):
break
if len(found) == len(jsondata['patches']):
+ logger.debug('Found all the patches')
return found
return None
@@ -376,21 +379,24 @@ def send_selected(cmdargs):
logger.info('Nothing to do')
sys.exit(0)
- listing = list()
- for num in cmdargs.send:
- try:
- index = int(num) - 1
- listing.append(tracked[index])
- except ValueError:
- logger.critical('Please provide the number of the message')
- logger.info('---')
- write_tracked(tracked)
- sys.exit(1)
- except IndexError:
- logger.critical('Invalid index: %s', num)
- logger.info('---')
- write_tracked(tracked)
- sys.exit(1)
+ if 'all' in cmdargs.discard:
+ listing = tracked
+ else:
+ listing = list()
+ for num in cmdargs.send:
+ try:
+ index = int(num) - 1
+ listing.append(tracked[index])
+ except ValueError:
+ logger.critical('Please provide the number of the message')
+ logger.info('---')
+ write_tracked(tracked)
+ sys.exit(1)
+ except IndexError:
+ logger.critical('Invalid index: %s', num)
+ logger.info('---')
+ write_tracked(tracked)
+ sys.exit(1)
if not len(listing):
logger.info('Nothing to do')
sys.exit(0)
@@ -453,12 +459,13 @@ def get_wanted_branch(cmdargs):
gitdir = cmdargs.gitdir
if not cmdargs.branch:
# Find out our current branch
- gitargs = ['branch', '--show-current']
+ gitargs = ['rev-parse', '--abbrev-ref', 'HEAD']
ecode, out = b4.git_run_command(gitdir, gitargs)
if ecode > 0:
logger.critical('Not able to get current branch (git branch --show-current)')
sys.exit(1)
wantbranch = out.strip()
+ logger.debug('will check branch=%s', wantbranch)
else:
# Make sure it's a real branch
gitargs = ['branch', '--format=%(refname:short)', '--list']