From 28b6825da28519b6ca0e8b3bce57700a1120ca9a Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Wed, 15 Apr 2020 12:42:21 -0400 Subject: Switch to using patchwork-compatible hashes Using strict attestation hashes for auto-thankinator is problematic, because "git am" uses a certain degree of fuzzing, so when we try to find applied patches by running "git diff" on actual commits, line counts may not be bit-for-bit identical. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++---------- b4/mbox.py | 2 +- b4/ty.py | 41 +++++++++++++++++------------- 3 files changed, 91 insertions(+), 31 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index d432ab1..626042a 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -620,6 +620,8 @@ class LoreMessage: self.pr_remote_tip_commit = None self.attestation = None + # Patchwork hash + self.pwhash = None self.msgid = LoreMessage.get_clean_msgid(self.msg) self.lsubject = LoreSubject(msg['Subject']) @@ -815,20 +817,57 @@ class LoreMessage: return msgid @staticmethod - def get_patch_hash(diff): - # The aim is to represent the patch as if you did the following: - # git diff HEAD~.. | dos2unix | sha256sum - # - # This subroutine removes anything at the beginning of diff data, like - # diffstat or any other auxiliary data, and anything trailing at the end - # XXX: This currently doesn't work for git binary patches - # + def get_patchwork_hash(diff): + # Make sure we just have the diff without any extraneous content. + diff = LoreMessage.get_clean_diff(diff) + """Generate a hash from a diff. Lifted verbatim from patchwork.""" + + prefixes = ['-', '+', ' '] + hashed = hashlib.sha1() + + for line in diff.split('\n'): + if len(line) <= 0: + continue + + hunk_match = HUNK_RE.match(line) + filename_match = FILENAME_RE.match(line) + + if filename_match: + # normalise -p1 top-directories + if filename_match.group(1) == '---': + filename = 'a/' + else: + filename = 'b/' + filename += '/'.join(filename_match.group(2).split('/')[1:]) + + line = filename_match.group(1) + ' ' + filename + elif hunk_match: + # remove line numbers, but leave line counts + def fn(x): + if not x: + return 1 + return int(x) + + line_nos = list(map(fn, hunk_match.groups())) + line = '@@ -%d +%d @@' % tuple(line_nos) + elif line[0] in prefixes: + # if we have a +, - or context line, leave as-is + pass + else: + # other lines are ignored + continue + + hashed.update((line + '\n').encode('utf-8')) + + return hashed.hexdigest() + + @staticmethod + def get_clean_diff(diff): diff = diff.replace('\r', '') # For keeping a buffer of lines preceding @@ ... @@ buflines = list() - - phasher = hashlib.sha256() + difflines = '' # Used for counting where we are in the patch pp = 0 @@ -846,21 +885,34 @@ class LoreMessage: break addlines.append(bline) if addlines: - phasher.update(('\n'.join(reversed(addlines)) + '\n').encode('utf-8')) + difflines += '\n'.join(reversed(addlines)) + '\n' buflines = list() # Feed this line to the hasher - phasher.update((line + '\n').encode('utf-8')) + difflines += line + '\n' continue if pp > 0: # Inside the patch - phasher.update((line + '\n').encode('utf-8')) + difflines += line + '\n' if len(line) and line[0] == '-': continue pp -= 1 continue # Not anything we recognize, so stick into buflines buflines.append(line) + return difflines + @staticmethod + def get_patch_hash(diff): + # The aim is to represent the patch as if you did the following: + # git diff HEAD~.. | dos2unix | sha256sum + # + # This subroutine removes anything at the beginning of diff data, like + # diffstat or any other auxiliary data, and anything trailing at the end + # XXX: This currently doesn't work for git binary patches + # + diff = LoreMessage.get_clean_diff(diff) + phasher = hashlib.sha256() + phasher.update(diff.encode('utf-8')) return phasher.hexdigest() def load_hashes(self): @@ -896,6 +948,7 @@ class LoreMessage: patch = pfh.read() if len(patch.strip()): p = LoreMessage.get_patch_hash(patch) + self.pwhash = LoreMessage.get_patchwork_hash(patch) os.unlink(patch_out[1]) if i and m and p: diff --git a/b4/mbox.py b/b4/mbox.py index 8c4ddec..f64ed6f 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -161,7 +161,7 @@ def thanks_record_am(lser): if pmsg.attestation is None: logger.debug('Unable to get hashes for all patches, not tracking for thanks') return - patches.append((pmsg.subject, pmsg.attestation.p)) + patches.append((pmsg.subject, pmsg.pwhash)) lmsg = lser.patches[0] if lmsg is None: diff --git a/b4/ty.py b/b4/ty.py index b1861e4..888da9b 100644 --- a/b4/ty.py +++ b/b4/ty.py @@ -158,7 +158,8 @@ def get_all_commits(gitdir, branch, since='1.week', committer=None): for line in lines: commit_id, subject = line.split(maxsplit=1) ecode, out = git_get_rev_diff(gitdir, commit_id) - pwhash = b4.LoreMessage.get_patch_hash(out) + pwhash = b4.LoreMessage.get_patchwork_hash(out) + logger.debug('phash=%s', pwhash) MY_COMMITS[pwhash] = (commit_id, subject) return MY_COMMITS @@ -171,6 +172,7 @@ def auto_locate_series(gitdir, jsondata, branch, since='1.week', loose=False): # We need to find all of them in the commits found = list() for patch in jsondata['patches']: + logger.debug('Checking %s', patch) if patch[1] in patchids: logger.debug('Found: %s', patch[0]) found.append(commits[patch[1]]) @@ -182,6 +184,7 @@ def auto_locate_series(gitdir, jsondata, branch, since='1.week', loose=False): break if len(found) == len(jsondata['patches']): + logger.debug('Found all the patches') return found return None @@ -376,21 +379,24 @@ def send_selected(cmdargs): logger.info('Nothing to do') sys.exit(0) - listing = list() - for num in cmdargs.send: - try: - index = int(num) - 1 - listing.append(tracked[index]) - except ValueError: - logger.critical('Please provide the number of the message') - logger.info('---') - write_tracked(tracked) - sys.exit(1) - except IndexError: - logger.critical('Invalid index: %s', num) - logger.info('---') - write_tracked(tracked) - sys.exit(1) + if 'all' in cmdargs.discard: + listing = tracked + else: + listing = list() + for num in cmdargs.send: + try: + index = int(num) - 1 + listing.append(tracked[index]) + except ValueError: + logger.critical('Please provide the number of the message') + logger.info('---') + write_tracked(tracked) + sys.exit(1) + except IndexError: + logger.critical('Invalid index: %s', num) + logger.info('---') + write_tracked(tracked) + sys.exit(1) if not len(listing): logger.info('Nothing to do') sys.exit(0) @@ -453,12 +459,13 @@ def get_wanted_branch(cmdargs): gitdir = cmdargs.gitdir if not cmdargs.branch: # Find out our current branch - gitargs = ['branch', '--show-current'] + gitargs = ['rev-parse', '--abbrev-ref', 'HEAD'] ecode, out = b4.git_run_command(gitdir, gitargs) if ecode > 0: logger.critical('Not able to get current branch (git branch --show-current)') sys.exit(1) wantbranch = out.strip() + logger.debug('will check branch=%s', wantbranch) else: # Make sure it's a real branch gitargs = ['branch', '--format=%(refname:short)', '--list'] -- cgit v1.2.3