aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-06-03 13:04:33 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-06-03 13:04:33 -0400
commit7e066cb8834233edde5fef6a5bb391fd2124448b (patch)
tree7ca78de6755f223cbc426914c0615a141bdd47c5
parent814f8fa85b67dc971cf86a18929a892d5e954a32 (diff)
downloadb4-7e066cb8834233edde5fef6a5bb391fd2124448b.tar.gz
Account for in-body headers when trimming body
When we discover that a message can only be attested after we trim the body, we *must* set the body to that version, otherwise an attacker could append arbitrary content past the l= value boundary. We already do this in the current form, but we weren't properly handing in-body headers like From: and Subject: that are used to indicate to git the patch author vs. committer. This patch set fixes that and also streamlines a few other places where we were already relying on git mailinfo calls. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py139
-rw-r--r--b4/mbox.py47
2 files changed, 98 insertions, 88 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index c1f77e8..a163364 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -43,7 +43,7 @@ try:
except ModuleNotFoundError:
can_patatt = False
-__VERSION__ = '0.7.0'
+__VERSION__ = '0.8-dev'
logger = logging.getLogger('b4')
@@ -57,23 +57,17 @@ ATT_FAIL_FANCY = '\033[31m\u2717\033[0m'
DEVSIG_HDR = 'X-Developer-Signature'
-# You can use bash-style globbing here
-WANTHDRS = [
- 'sender',
- 'from',
- 'to',
- 'cc',
- 'subject',
- 'date',
- 'message-id',
- 'resent-message-id',
- 'reply-to',
- 'in-reply-to',
- 'references',
- 'list-id',
- 'errors-to',
- 'x-mailing-list',
- 'resent-to',
+# Headers to include into am-ready messages
+# From: and Subject: are always included
+AMHDRS = [
+ 'Date',
+ 'Message-Id',
+ 'To',
+ 'Cc',
+ 'Reply-To',
+ 'In-Reply-To',
+ 'References',
+ 'List-Id',
]
# You can use bash-style globbing here
@@ -1054,22 +1048,17 @@ class LoreMessage:
if not matches:
return
bl = int(matches.groups()[0])
- with tempfile.TemporaryDirectory() as tfd:
- m_out = os.path.join(tfd, 'm')
- p_out = os.path.join(tfd, 'p')
- cmdargs = ['mailinfo', '--encoding=UTF-8', '--no-scissors', m_out, p_out]
- ecode, info = git_run_command(None, cmdargs, self.msg.as_bytes())
- if not len(info.strip()):
- return
- with open(m_out, 'rb') as mfh:
- bm = mfh.read()
- with open(p_out, 'rb') as pfh:
- bp = pfh.read()
- bb = b''
- for line in re.sub(rb'[\r\n]*$', b'', bm + bp).split(b'\n'):
- bb += re.sub(rb'[\r\n]*$', b'', line) + b'\r\n'
- if len(bb) > bl:
- self.body = bb[:bl].decode()
+ i, m, p = get_mailinfo(self.msg.as_bytes(), scissors=False)
+ bb = b''
+ for line in re.sub(rb'[\r\n]*$', b'', m + p).split(b'\n'):
+ bb += re.sub(rb'[\r\n]*$', b'', line) + b'\r\n'
+ if len(bb) > bl:
+ self.body = bb[:bl].decode()
+ # This may have potentially resulted in in-body From/Subject being removed,
+ # so make sure we account for this in the message headers
+ self.lsubject.subject = self.subject = i.get('Subject')
+ self.fromname = i.get('Author')
+ self.fromemail = i.get('Email')
def _load_patatt_attestors(self) -> None:
if not can_patatt:
@@ -1103,7 +1092,9 @@ class LoreMessage:
break
if success:
if trim_body:
- # set the body to the trimmed version
+ # If we only succeeded after trimming the body, then we MUST set the body
+ # to that value, otherwise someone can append arbitrary content after the l= value
+ # limit message.
self._trim_body()
break
if not success and trim_body:
@@ -1500,16 +1491,19 @@ class LoreMessage:
self.body += signature
self.body += '\n'
- def get_am_subject(self):
+ def get_am_subject(self, indicate_reroll=True):
# Return a clean patch subject
parts = ['PATCH']
if self.lsubject.rfc:
parts.append('RFC')
if self.reroll_from_revision:
- if self.reroll_from_revision != self.revision:
- parts.append('v%d->v%d' % (self.reroll_from_revision, self.revision))
+ if indicate_reroll:
+ if self.reroll_from_revision != self.revision:
+ parts.append('v%d->v%d' % (self.reroll_from_revision, self.revision))
+ else:
+ parts.append(' %s v%d' % (' ' * len(str(self.reroll_from_revision)), self.revision))
else:
- parts.append(' %s v%d' % (' ' * len(str(self.reroll_from_revision)), self.revision))
+ parts.append('v%d' % self.revision)
elif not self.revision_inferred:
parts.append('v%d' % self.revision)
if not self.lsubject.counters_inferred:
@@ -1520,25 +1514,26 @@ class LoreMessage:
def get_am_message(self, add_trailers=True, trailer_order=None, copyccs=False):
if add_trailers:
self.fix_trailers(trailer_order=trailer_order, copyccs=copyccs)
- am_body = self.body.rstrip('\r\n')
am_msg = email.message.EmailMessage()
- am_msg.set_payload(am_body.encode() + b'\n')
- # Clean up headers
- for hdrname, hdrval in self.msg.items():
- lhdrname = hdrname.lower()
- wanthdr = False
- for hdrmatch in WANTHDRS:
- if fnmatch.fnmatch(lhdrname, hdrmatch):
- wanthdr = True
- break
- if wanthdr:
- new_hdrval = LoreMessage.clean_header(hdrval)
- # noinspection PyBroadException
- try:
- am_msg.add_header(hdrname, new_hdrval)
- except:
- # A broad except to handle any potential weird header conditions
- pass
+ am_msg.set_payload(self.body.encode())
+ am_msg.add_header('Subject', self.get_am_subject(indicate_reroll=False))
+ if self.fromname:
+ am_msg.add_header('From', f'{self.fromname} <{self.fromemail}>')
+ else:
+ am_msg.add_header('From', self.fromemail)
+
+ # Add select headers from the original message
+ for hname in AMHDRS:
+ hval = self.msg.get(hname)
+ if not hval:
+ continue
+ hval = LoreMessage.clean_header(hval)
+ # noinspection PyBroadException
+ try:
+ am_msg.add_header(hname, hval)
+ except:
+ # A broad except to handle any potential weird header conditions
+ pass
am_msg.set_charset('utf-8')
return am_msg
@@ -2359,3 +2354,33 @@ def get_lore_projects_from_msg(msg) -> list:
projects.append(projmap[entry[1]])
return projects
+
+
+def get_mailinfo(bmsg: bytes, scissors: bool = False) -> Tuple[dict, bytes, bytes]:
+ with tempfile.TemporaryDirectory() as tfd:
+ m_out = os.path.join(tfd, 'm')
+ p_out = os.path.join(tfd, 'p')
+ if scissors:
+ cmdargs = ['mailinfo', '--encoding=UTF-8', '--scissors', m_out, p_out]
+ else:
+ cmdargs = ['mailinfo', '--encoding=UTF-8', '--no-scissors', m_out, p_out]
+
+ ecode, info = git_run_command(None, cmdargs, bmsg)
+ if not len(info.strip()):
+ raise ValueError('Could not get mailinfo')
+
+ i = dict()
+ m = b''
+ p = b''
+ for line in info.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+ chunks = line.split(':', 1)
+ i[chunks[0]] = chunks[1].strip()
+
+ with open(m_out, 'rb') as mfh:
+ m = mfh.read()
+ with open(p_out, 'rb') as pfh:
+ p = pfh.read()
+ return i, m, p
diff --git a/b4/mbox.py b/b4/mbox.py
index f575f94..33d7772 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -350,37 +350,22 @@ def save_as_quilt(am_msgs, q_dirname):
return
pathlib.Path(q_dirname).mkdir(parents=True)
patch_filenames = list()
- with tempfile.TemporaryDirectory() as tfd:
- m_out = os.path.join(tfd, 'm')
- p_out = os.path.join(tfd, 'p')
- for slug, msg in am_msgs:
- # Run each message through git mailinfo
- cmdargs = ['mailinfo', '--encoding=UTF-8', '--scissors', m_out, p_out]
- ecode, info = b4.git_run_command(None, cmdargs, msg.as_bytes(policy=b4.emlpolicy))
- if not len(info.strip()):
- logger.critical('ERROR: Could not get mailinfo from patch %s', msg.get('Subject', '(no subject)'))
- continue
- patchinfo = dict()
- for line in info.split('\n'):
- line = line.strip()
- if not line:
- continue
- chunks = line.split(':', 1)
- patchinfo[chunks[0]] = chunks[1].strip().encode()
-
- patch_filename = f'{slug}.patch'
- patch_filenames.append(patch_filename)
- quilt_out = os.path.join(q_dirname, patch_filename)
- with open(quilt_out, 'wb') as fh:
- fh.write(b'From: %s <%s>\n' % (patchinfo['Author'], patchinfo['Email']))
- fh.write(b'Subject: %s\n' % patchinfo['Subject'])
- fh.write(b'Date: %s\n' % patchinfo['Date'])
- fh.write(b'\n')
- with open(m_out, 'rb') as mfh:
- shutil.copyfileobj(mfh, fh)
- with open(p_out, 'rb') as pfh:
- shutil.copyfileobj(pfh, fh)
- logger.debug(' Wrote: %s', patch_filename)
+ for slug, msg in am_msgs:
+ patch_filename = f'{slug}.patch'
+ patch_filenames.append(patch_filename)
+ quilt_out = os.path.join(q_dirname, patch_filename)
+ i, m, p = b4.get_mailinfo(msg.as_bytes(policy=b4.emlpolicy), scissors=True)
+ with open(quilt_out, 'wb') as fh:
+ if i.get('Author'):
+ fh.write(b'From: %s <%s>\n' % (i.get('Author').encode(), i.get('Email').encode()))
+ else:
+ fh.write(b'From: %s\n' % i.get('Email').encode())
+ fh.write(b'Subject: %s\n' % i.get('Subject').encode())
+ fh.write(b'Date: %s\n' % i.get('Date').encode())
+ fh.write(b'\n')
+ fh.write(m)
+ fh.write(p)
+ logger.debug(' Wrote: %s', patch_filename)
# Write the series file
with open(os.path.join(q_dirname, 'series'), 'w') as sfh:
for patch_filename in patch_filenames: