From 084f3978c0c86aedf01fc245ffdd95e6ed694d14 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Thu, 10 Sep 2020 14:02:46 -0400 Subject: Tighten follow-up header parsing The combined routine was too broad for parsing follow-up messages, so this tightens it to avoid too many false positive matches. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 1cebe2b..d4a67a5 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -1179,6 +1179,8 @@ class LoreMessage: @staticmethod def find_trailers(body): + headers = ('subject', 'date', 'from') + nonperson = ('fixes', 'subject', 'date') # Fix some more common copypasta trailer wrapping # Fixes: abcd0123 (foo bar # baz quux) @@ -1189,7 +1191,8 @@ class LoreMessage: # Signed-off-by: Foo foo # [for the thing that the thing is too long the thing that is # thing but thing] - body = re.sub(r'^(\[[^]]+)\n([^]]+]$)', r'\1 \2', body, flags=re.M) + # (too false-positivey, commented out) + # body = re.sub(r'^(\[[^]]+)\n([^]]+]$)', r'\1 \2', body, flags=re.M) trailers = list() others = list() was_trailer = False @@ -1197,8 +1200,17 @@ class LoreMessage: line = line.strip('\r') matches = re.search(r'^(\w\S+):\s+(\S.*)', line, flags=re.I) if matches: - was_trailer = True groups = list(matches.groups()) + # We only accept headers if we haven't seen any non-trailer lines + tname = groups[0].lower() + if len(others) and tname in headers: + logger.debug('Ignoring %s (header after other content)', line) + continue + mperson = re.search(r'<[^>]+>', groups[1]) + if not mperson and tname not in nonperson: + logger.debug('Ignoring %s (not a recognized non-person trailer)', line) + continue + was_trailer = True groups.append(None) trailers.append(groups) continue -- cgit v1.2.3