From 50dd627b9658081b8f9ddf2466ebb4b174684a55 Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Sun, 18 Jul 2021 00:34:05 -0400 Subject: Avoid decoding errors when extracting message ID from stdin The mbox, am, and pr subcommands accept an mbox on stdin and extract the message ID. When stdin.read() is called, Python assumes the encoding is locale.getpreferredencoding(False). This may not match the content encoding, leading to a decoding error. Instead feed the stdin bytes to message_from_bytes(), which leads to a decode('ASCII', errors='surrogateescape') underneath. That's sufficient to get the message ID from the ASCII headers. Reported-by: Michael S. Tsirkin Signed-off-by: Kyle Meyer Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 2 +- b4/pr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index c019358..bc05dcd 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -2075,7 +2075,7 @@ def get_requests_session(): def get_msgid_from_stdin(): if not sys.stdin.isatty(): - message = email.message_from_string(sys.stdin.read()) + message = email.message_from_bytes(sys.stdin.buffer.read()) return message.get('Message-ID', None) return None diff --git a/b4/pr.py b/b4/pr.py index c5c5fc7..5a66180 100644 --- a/b4/pr.py +++ b/b4/pr.py @@ -507,7 +507,7 @@ def main(cmdargs): if not sys.stdin.isatty(): logger.debug('Getting PR message from stdin') - msg = email.message_from_string(sys.stdin.read()) + msg = email.message_from_bytes(sys.stdin.buffer.read()) cmdargs.msgid = b4.LoreMessage.get_clean_msgid(msg) lmsg = parse_pr_data(msg) else: -- cgit v1.2.3