aboutsummaryrefslogtreecommitdiff
path: root/b4/__init__.py
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-11-01 15:31:20 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-11-01 15:31:20 -0400
commit860c3115d9c2f66dda7beddb0c0562ca0a76f7df (patch)
treeef365ee7ee61e200ac107e7fc7c4bdcdfcbd69d4 /b4/__init__.py
parentd7c823c519a8158127626e259941c53ae7c7a26a (diff)
downloadb4-860c3115d9c2f66dda7beddb0c0562ca0a76f7df.tar.gz
mbox: initial support to check for unicode control chars
Implement initial support for checking if the patch message contains unicode control characters that can be used to trick code reviewer into accepting maliciously formatted code. Link: https://lore.kernel.org/tools/20211101175020.5r4cwmy4qppi7dis@meerkat.local/ Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Diffstat (limited to 'b4/__init__.py')
-rw-r--r--b4/__init__.py41
1 files changed, 37 insertions, 4 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 14224a7..e05af03 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -84,6 +84,19 @@ AMHDRS = [
'List-Id',
]
+# Unicode chars that can be used to mess up legitimate code review
+BAD_UNI_CHARS = {
+ chr(0x202A),
+ chr(0x202B),
+ chr(0x202C),
+ chr(0x202D),
+ chr(0x202E),
+ chr(0x2066),
+ chr(0x2067),
+ chr(0x2068),
+ chr(0x2069),
+}
+
# You can use bash-style globbing here
# end with '*' to include any other trailers
# You can change the default in your ~/.gitconfig, e.g.:
@@ -491,7 +504,7 @@ class LoreSeries:
return slug[:100]
def get_am_ready(self, noaddtrailers=False, covertrailers=False, trailer_order=None, addmysob=False,
- addlink=False, linkmask=None, cherrypick=None, copyccs=False) -> list:
+ addlink=False, linkmask=None, cherrypick=None, copyccs=False, allowbadchars=False) -> list:
usercfg = get_user_config()
config = get_main_config()
@@ -579,7 +592,8 @@ class LoreSeries:
add_trailers = True
if noaddtrailers:
add_trailers = False
- msg = lmsg.get_am_message(add_trailers=add_trailers, trailer_order=trailer_order, copyccs=copyccs)
+ msg = lmsg.get_am_message(add_trailers=add_trailers, trailer_order=trailer_order, copyccs=copyccs,
+ allowbadchars=allowbadchars)
msgs.append(msg)
else:
logger.error(' ERROR: missing [%s/%s]!', at, self.expected)
@@ -1596,11 +1610,30 @@ class LoreMessage:
return '[%s] %s' % (' '.join(parts), self.lsubject.subject)
- def get_am_message(self, add_trailers=True, trailer_order=None, copyccs=False):
+ def get_am_message(self, add_trailers=True, trailer_order=None, copyccs=False, allowbadchars=False):
if add_trailers:
self.fix_trailers(trailer_order=trailer_order, copyccs=copyccs)
+ bbody = self.body.encode()
+ # Look through the body to make sure there aren't any unwanted unicode characters
+ # First, encode into ascii and compare for a quickie utf8 presence test
+ if self.body.encode('ascii', errors='replace') != bbody:
+ logger.debug('Body contains non-ascii characters. Performing a test against badchars.')
+ matches = {u for u in self.body if u in BAD_UNI_CHARS}
+ if matches and not allowbadchars:
+ logger.critical('---')
+ logger.critical('WARNING: Message contains unicode control characters!')
+ logger.critical(' Subject: %s', self.full_subject)
+ logger.critical(' If you know what you are doing, rerun with the right flag to allow this.')
+ sys.exit(1)
+ if matches and allowbadchars:
+ logger.info('---')
+ logger.info('WARNING: Message contains unicode control characters!')
+ logger.info(' Subject: %s', self.full_subject)
+ logger.info(' Allowing this through, I hope you know what you are doing.')
+ logger.info('---')
+
am_msg = email.message.EmailMessage()
- am_msg.set_payload(self.body.encode())
+ am_msg.set_payload(bbody)
am_msg.add_header('Subject', self.get_am_subject(indicate_reroll=False))
if self.fromname:
am_msg.add_header('From', f'{self.fromname} <{self.fromemail}>')