From 1c51bebcfa329b2fb9cfae1107427399698817c5 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Fri, 19 Aug 2022 15:46:57 -0400 Subject: ez: implement large chunk of web endpoint submission This implements most of the web endpoint submission functionality. Completely undocumented and needs a lot more testing before it's useful, but we're getting close. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 160 ++++++++++++++++++++++----------- b4/ez.py | 64 ++++++------- b4/mbox.py | 1 + b4/ty.py | 2 +- misc/send-receive.py | 250 +++++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 381 insertions(+), 96 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index fd70858..2b70c38 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -30,7 +30,7 @@ import requests from pathlib import Path from contextlib import contextmanager -from typing import Optional, Tuple, Set, List, TextIO, Union +from typing import Optional, Tuple, Set, List, TextIO, Union, Sequence from email import charset charset.add_charset('utf-8', None) @@ -1210,9 +1210,27 @@ class LoreMessage: if attpolicy == 'hardfail': critical = True else: + passing = False if not checkmark: checkmark = attestor.checkmark if attestor.check_identity(self.fromemail): + passing = True + else: + # Do we have an x-original-from? + xofh = self.msg.get('X-Original-From') + if xofh: + logger.debug('Using X-Original-From for identity check') + xpair = email.utils.getaddresses([xofh])[0] + if attestor.check_identity(xpair[1]): + passing = True + # Fix our fromname and fromemail, mostly for thanks-tracking + self.fromname = xpair[0] + self.fromemail = xpair[1] + # Drop the reply-to header if it's exactly the same + for header in list(self.msg._headers): # noqa + if header[0].lower() == 'reply-to' and header[1].find(xpair[1]) > 0: + self.msg._headers.remove(header) # noqa + if passing: trailers.append('%s Signed: %s' % (attestor.checkmark, attestor.trailer)) else: trailers.append('%s Signed: %s (From: %s)' % (attestor.checkmark, attestor.trailer, @@ -2858,62 +2876,100 @@ def patchwork_set_state(msgids: List[str], state: str) -> bool: logger.debug('Patchwork REST error: %s', ex) -def send_smtp(smtp: Union[smtplib.SMTP, smtplib.SMTP_SSL, None], msg: email.message.Message, - fromaddr: str, destaddrs: Optional[Union[Tuple, Set]] = None, +def send_mail(smtp: Union[smtplib.SMTP, smtplib.SMTP_SSL, None], msgs: Sequence[email.message.Message], + fromaddr: Optional[str], destaddrs: Optional[Union[set, list]] = None, patatt_sign: bool = False, dryrun: bool = False, maxheaderlen: Optional[int] = None, - write_to: Optional[str] = None) -> bool: + output_dir: Optional[str] = None) -> Optional[int]: - if write_to is not None: + tosend = list() + if output_dir is not None: dryrun = True - if not msg.get('X-Mailer'): - msg.add_header('X-Mailer', f'b4 {__VERSION__}') - msg.set_charset('utf-8') - msg.replace_header('Content-Transfer-Encoding', '8bit') - msg.policy = email.policy.EmailPolicy(utf8=True, cte_type='8bit') - # Python's sendmail implementation seems to have some logic problems where 8-bit messages are involved. - # As far as I understand the difference between 8BITMIME (supported by nearly all smtp servers) and - # SMTPUTF8 (supported by very few), SMTPUTF8 is only required when the addresses specified in either - # "MAIL FROM" or "RCPT TO" lines of the _protocol exchange_ themselves have 8bit characters, not - # anything in the From: header of the DATA payload. Python's smtplib seems to always try to encode - # strings as ascii regardless of what was policy was specified. - # Work around this by getting the payload as string and then encoding to bytes ourselves. - if maxheaderlen is None: + for msg in msgs: + if not msg.get('X-Mailer'): + msg.add_header('X-Mailer', f'b4 {__VERSION__}') + msg.set_charset('utf-8') + msg.replace_header('Content-Transfer-Encoding', '8bit') + msg.policy = email.policy.EmailPolicy(utf8=True, cte_type='8bit') + # Python's sendmail implementation seems to have some logic problems where 8-bit messages are involved. + # As far as I understand the difference between 8BITMIME (supported by nearly all smtp servers) and + # SMTPUTF8 (supported by very few), SMTPUTF8 is only required when the addresses specified in either + # "MAIL FROM" or "RCPT TO" lines of the _protocol exchange_ themselves have 8bit characters, not + # anything in the From: header of the DATA payload. Python's smtplib seems to always try to encode + # strings as ascii regardless of what was policy was specified. + # Work around this by getting the payload as string and then encoding to bytes ourselves. + if maxheaderlen is None: + if dryrun: + # Make it fit the terminal window, but no wider than 120 minus visual padding + ts = shutil.get_terminal_size((120, 20)) + maxheaderlen = ts.columns - 8 + if maxheaderlen > 112: + maxheaderlen = 112 + else: + # Use a sane-ish default (we don't need to stick to 80, but + # we need to make sure it's shorter than 255) + maxheaderlen = 120 + + emldata = msg.as_string(maxheaderlen=maxheaderlen) + # Force compliant eols + emldata = re.sub(r'\r\n|\n|\r(?!\n)', '\r\n', emldata) + bdata = emldata.encode() + if patatt_sign: + import patatt + # patatt.logger = logger + bdata = patatt.rfc2822_sign(bdata) if dryrun: - # Make it fit the terminal window, but no wider than 120 minus visual padding - ts = shutil.get_terminal_size((120, 20)) - maxheaderlen = ts.columns - 8 - if maxheaderlen > 112: - maxheaderlen = 112 - else: - # Use a sane-ish default (we don't need to stick to 80, but - # we need to make sure it's shorter than 255) - maxheaderlen = 120 - - emldata = msg.as_string(maxheaderlen=maxheaderlen) - # Force compliant eols - emldata = re.sub(r'\r\n|\n|\r(?!\n)', '\r\n', emldata) - bdata = emldata.encode() - if patatt_sign: - import patatt - # patatt.logger = logger - bdata = patatt.rfc2822_sign(bdata) - if dryrun or smtp is None: - if write_to: - with open(write_to, 'wb') as fh: - fh.write(bdata.replace(b'\r\n', b'\n')) - return True - logger.info(' --- DRYRUN: message follows ---') - logger.info(' | ' + bdata.decode().rstrip().replace('\n', '\n | ')) - logger.info(' --- DRYRUN: message ends ---') - return True - if not destaddrs: - alldests = email.utils.getaddresses([str(x) for x in msg.get_all('to', [])]) - alldests += email.utils.getaddresses([str(x) for x in msg.get_all('cc', [])]) - destaddrs = {x[1] for x in alldests} - smtp.sendmail(fromaddr, destaddrs, bdata) - # TODO: properly catch exceptions on sending - return True + if output_dir: + subject = msg.get('Subject', '') + ls = LoreSubject(subject) + filen = '%s.eml' % ls.get_slug(sep='-') + logger.info(' %s', filen) + write_to = os.path.join(output_dir, filen) + with open(write_to, 'wb') as fh: + fh.write(bdata) + continue + logger.info(' --- DRYRUN: message follows ---') + logger.info(' | ' + bdata.decode().rstrip().replace('\n', '\n | ')) + logger.info(' --- DRYRUN: message ends ---') + continue + if not destaddrs: + alldests = email.utils.getaddresses([str(x) for x in msg.get_all('to', [])]) + alldests += email.utils.getaddresses([str(x) for x in msg.get_all('cc', [])]) + destaddrs = {x[1] for x in alldests} + + tosend.append((destaddrs, bdata)) + + if not len(tosend): + return 0 + + # Do we have an endpoint defined? + config = get_main_config() + endpoint = config.get('send-endpoint-web') + if endpoint: + logger.info('Sending via web endpoint %s', endpoint) + req = { + 'action': 'receive', + 'messages': [x[1].decode() for x in tosend], + } + ses = get_requests_session() + res = ses.post(endpoint, json=req) + if res.status_code == 200: + try: + rdata = res.json() + if rdata.get('result') == 'success': + return len(tosend) + except Exception as ex: # noqa + logger.critical('Odd response from the endpoint: %s', res.text) + + logger.critical('500 response from the endpoint: %s', res.text) + return None + + if smtp: + sent = 0 + for destaddrs, bdata in tosend: + smtp.sendmail(fromaddr, destaddrs, bdata) + sent += 1 + return sent def git_get_current_branch(gitdir: Optional[str] = None, short: bool = True) -> Optional[str]: diff --git a/b4/ez.py b/b4/ez.py index 6e3409e..3615130 100644 --- a/b4/ez.py +++ b/b4/ez.py @@ -139,13 +139,7 @@ def auth_new(cmdargs: argparse.Namespace) -> None: } logger.info('Submitting new auth request to %s', endpoint) ses = b4.get_requests_session() - try: - res = ses.post(endpoint, json=req) - res.raise_for_status() - except Exception as ex: - logger.critical('CRITICAL: unable to send endpoint request') - logger.critical(' %s', ex) - sys.exit(1) + res = ses.post(endpoint, json=req) logger.info('---') if res.status_code == 200: try: @@ -186,13 +180,7 @@ def auth_verify(cmdargs: argparse.Namespace) -> None: } logger.info('Submitting verification to %s', endpoint) ses = b4.get_requests_session() - try: - res = ses.post(endpoint, json=req) - res.raise_for_status() - except Exception as ex: - logger.critical('CRITICAL: unable to send endpoint request') - logger.critical(' %s', ex) - sys.exit(1) + res = ses.post(endpoint, json=req) logger.info('---') if res.status_code == 200: try: @@ -1081,17 +1069,10 @@ def cmd_send(cmdargs: argparse.Namespace) -> None: sign = True if cmdargs.no_sign or config.get('send-no-patatt-sign', '').lower() in {'yes', 'true', 'y'}: sign = False - identity = config.get('sendemail-identity') - try: - smtp, fromaddr = b4.get_smtp(identity, dryrun=cmdargs.dryrun) - except Exception as ex: # noqa - logger.critical('Failed to configure the smtp connection:') - logger.critical(ex) - sys.exit(1) - counter = 0 cover_msgid = None # TODO: Need to send obsoleted-by follow-ups, just need to figure out where. + send_msgs = list() for commit, msg in patches: if not msg: continue @@ -1106,27 +1087,40 @@ def cmd_send(cmdargs: argparse.Namespace) -> None: msg.add_header('To', b4.format_addrs(allto)) if allcc: msg.add_header('Cc', b4.format_addrs(allcc)) - if cmdargs.output_dir: - subject = msg.get('Subject', '') - ls = b4.LoreSubject(subject) - filen = '%s.eml' % ls.get_slug(sep='-') - logger.info(' %s', filen) - write_to = os.path.join(cmdargs.output_dir, filen) - else: - write_to = None + if not cmdargs.output_dir: logger.info(' %s', re.sub(r'\s+', ' ', msg.get('Subject'))) - if b4.send_smtp(smtp, msg, fromaddr=fromaddr, destaddrs=alldests, patatt_sign=sign, - dryrun=cmdargs.dryrun, write_to=write_to): - counter += 1 + send_msgs.append(msg) + + if config.get('send-endpoint-web'): + # Web endpoint always requires signing + if not sign: + logger.critical('CRITICAL: Web endpoint is defined for sending, but signing is turned off') + logger.critical(' Please re-enable signing or use SMTP') + sys.exit(1) + + sent = b4.send_mail(None, send_msgs, fromaddr=None, destaddrs=None, patatt_sign=True, + dryrun=cmdargs.dryrun, output_dir=cmdargs.output_dir) + else: + identity = config.get('sendemail-identity') + try: + smtp, fromaddr = b4.get_smtp(identity, dryrun=cmdargs.dryrun) + except Exception as ex: # noqa + logger.critical('Failed to configure the smtp connection:') + logger.critical(ex) + sys.exit(1) + + sent = b4.send_mail(smtp, send_msgs, fromaddr=fromaddr, destaddrs=alldests, patatt_sign=sign, + dryrun=cmdargs.dryrun, output_dir=cmdargs.output_dir) logger.info('---') if cmdargs.dryrun: - logger.info('DRYRUN: Would have sent %s messages', counter) + logger.info('DRYRUN: Would have sent %s messages', len(send_msgs)) return else: - logger.info('Sent %s messages', counter) + logger.info('Sent %s messages', sent) + # TODO: need to make the reroll process smoother mybranch = b4.git_get_current_branch() revision = tracking['series']['revision'] diff --git a/b4/mbox.py b/b4/mbox.py index 1bc66c4..9a6ea9c 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -456,6 +456,7 @@ def thanks_record_am(lser, cherrypick=None): allto = email.utils.getaddresses([str(x) for x in lmsg.msg.get_all('to', [])]) allcc = email.utils.getaddresses([str(x) for x in lmsg.msg.get_all('cc', [])]) + # TODO: check for reply-to and x-original-from out = { 'msgid': lmsg.msgid, 'subject': lmsg.full_subject, diff --git a/b4/ty.py b/b4/ty.py index dae5206..7ca64da 100644 --- a/b4/ty.py +++ b/b4/ty.py @@ -431,7 +431,7 @@ def send_messages(listing, branch, cmdargs): if not fromaddr: fromaddr = jsondata['myemail'] logger.info(' Sending: %s', msg.get('subject')) - b4.send_smtp(smtp, msg, fromaddr, dryrun=cmdargs.dryrun) + b4.send_mail(smtp, [msg], fromaddr, dryrun=cmdargs.dryrun) else: slug_from = re.sub(r'\W', '_', jsondata['fromemail']) slug_subj = re.sub(r'\W', '_', jsondata['subject']) diff --git a/misc/send-receive.py b/misc/send-receive.py index c102508..c54e0ef 100644 --- a/misc/send-receive.py +++ b/misc/send-receive.py @@ -6,17 +6,66 @@ import logging import json import sqlalchemy as sa import patatt +import smtplib +import email +import email.header +import email.policy +import re + +from configparser import ConfigParser, ExtendedInterpolation +from string import Template +from email import utils +from typing import Tuple, Union + +from email import charset +charset.add_charset('utf-8', None) +emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None) DB_VERSION = 1 +# We'll make this configurable later +TPT_VERIFY_SUBJECT = 'Web endpoint verification for ${identity}' +TPT_VERIFY_BODY = '''Dear ${name}: + +Somebody, probably you, initiated a web endpoint verification routine +for patch submissions at: ${myurl} + +If you have no idea what is going on, please ignore this message. +Otherwise, please follow instructions provided by your tool and paste +the following string: + +${challenge} + +Happy patching! +-- +Deet-doot-dot, I'm a bot +https://korg.docs.kernel.org/ +''' + +DEFAULT_CFG = ''' +[main] + myname = Web Endpoint + myurl = http://localhost:8000/_b4_submit + dburl = sqlite:///:memory: + mydomains = kernel.org, linux.dev + dryrun = false +[sendemail] + smtpserver = localhost + from = devnull@kernel.org +[public-inbox] + repo = + listid = patches.feeds.kernel.org +''' + logger = logging.getLogger('b4-send-receive') # noinspection PyBroadException, PyMethodMayBeStatic class SendReceiveListener(object): - def __init__(self, _engine): + def __init__(self, _engine, _config): self._engine = _engine + self._config = _config # You shouldn't use this in production if self._engine.driver == 'pysqlite': self._init_sa_db() @@ -47,14 +96,51 @@ class SendReceiveListener(object): resp.content_type = falcon.MEDIA_TEXT resp.text = "We don't serve GETs here\n" - def send_error(self, resp, message): + def send_error(self, resp, message: str): resp.status = falcon.HTTP_500 resp.text = json.dumps({'result': 'error', 'message': message}) - def send_success(self, resp, message): + def send_success(self, resp, message: str): resp.status = falcon.HTTP_200 resp.text = json.dumps({'result': 'success', 'message': message}) + def get_smtp(self) -> Tuple[Union[smtplib.SMTP, smtplib.SMTP_SSL, None], Tuple[str, str]]: + sconfig = self._config['sendemail'] + server = sconfig.get('smtpserver', 'localhost') + port = sconfig.get('smtpserverport', 0) + encryption = sconfig.get('smtpencryption') + + logger.debug('Connecting to %s:%s', server, port) + # We only authenticate if we have encryption + if encryption: + if encryption in ('tls', 'starttls'): + # We do startssl + smtp = smtplib.SMTP(server, port) + # Introduce ourselves + smtp.ehlo() + # Start encryption + smtp.starttls() + # Introduce ourselves again to get new criteria + smtp.ehlo() + elif encryption in ('ssl', 'smtps'): + # We do TLS from the get-go + smtp = smtplib.SMTP_SSL(server, port) + else: + raise smtplib.SMTPException('Unclear what to do with smtpencryption=%s' % encryption) + + # If we got to this point, we should do authentication. + auser = sconfig.get('smtpuser') + apass = sconfig.get('smtppass') + if auser and apass: + # Let any exceptions bubble up + smtp.login(auser, apass) + else: + # We assume you know what you're doing if you don't need encryption + smtp = smtplib.SMTP(server, port) + + frompair = utils.getaddresses([sconfig.get('from')])[0] + return smtp, frompair + def auth_new(self, jdata, resp): # Is it already authorized? conn = self._engine.connect() @@ -79,9 +165,33 @@ class SendReceiveListener(object): q = sa.insert(t_auth).values(identity=identity, selector=selector, pubkey=pubkey, challenge=cstr, verified=0) conn.execute(q) - # TODO: Actual mail sending logger.info('Challenge: %s', cstr) - self.send_success(resp, message='Challenge generated') + smtp, frompair = self.get_smtp() + cmsg = email.message.EmailMessage() + fromname, fromaddr = frompair + if len(fromname): + cmsg.add_header('From', f'{fromname} <{fromaddr}>') + else: + cmsg.add_header('From', fromaddr) + subject = Template(TPT_VERIFY_SUBJECT).safe_substitute({'identity': jdata.get('identity')}) + cmsg.add_header('Subject', subject) + name = jdata.get('name', 'Anonymous Llama') + cmsg.add_header('To', f'{name} <{identity}>') + cmsg.add_header('Message-Id', utils.make_msgid('b4-verify')) + vals = { + 'name': name, + 'myurl': self._config['main'].get('myurl'), + 'challenge': cstr, + } + body = Template(TPT_VERIFY_BODY).safe_substitute(vals) + cmsg.set_payload(body, charset='utf-8') + bdata = cmsg.as_bytes(policy=emlpolicy) + destaddrs = [identity] + alwaysbcc = self._config['main'].get('alwayscc') + if alwaysbcc: + destaddrs += [x[1] for x in utils.getaddresses(alwaysbcc)] + smtp.sendmail(fromaddr, [identity], bdata) + self.send_success(resp, message=f'Challenge generated and sent to {identity}') def validate_message(self, conn, t_auth, bdata, verified=1): # Returns auth_id of the matching record @@ -161,6 +271,117 @@ class SendReceiveListener(object): conn.execute(q) self.send_success(resp, message='Authentication deleted') + def clean_header(self, hdrval): + if hdrval is None: + return '' + + decoded = '' + for hstr, hcs in email.header.decode_header(hdrval): + if hcs is None: + hcs = 'utf-8' + try: + decoded += hstr.decode(hcs, errors='replace') + except LookupError: + # Try as utf-u + decoded += hstr.decode('utf-8', errors='replace') + except (UnicodeDecodeError, AttributeError): + decoded += hstr + new_hdrval = re.sub(r'\n?\s+', ' ', decoded) + return new_hdrval.strip() + + def receive(self, jdata, resp): + servicename = self._config['main'].get('myname') + if not servicename: + servicename = 'Web Endpoint' + umsgs = jdata.get('messages') + if not umsgs: + self.send_error(resp, message='Missing the messages array') + return + msgs = list() + conn = self._engine.connect() + md = sa.MetaData() + t_auth = sa.Table('auth', md, autoload=True, autoload_with=self._engine) + # First, validate all signatures + at = 0 + for umsg in umsgs: + at += 1 + auth_id = self.validate_message(conn, t_auth, umsg.encode()) + if auth_id is None: + self.send_error(resp, message=f'Signature validation failed for message {at}') + return + msg = email.message_from_string(umsg) + msg.add_header('X-Endpoint-Received', f'by {servicename} with auth_id={auth_id}') + msgs.append(msg) + + # All signatures verified. Prepare messages for sending. + cfgdomains = self._config['main'].get('mydomains') + if cfgdomains is not None: + mydomains = [x.strip() for x in cfgdomains.split(',')] + else: + mydomains = list() + + smtp, frompair = self.get_smtp() + + for msg in msgs: + # TODO: public-inbox writing at this point + subject = self.clean_header(msg.get('Subject')) + origfrom = self.clean_header(msg.get('From')) + origpair = utils.getaddresses([origfrom])[0] + origaddr = origpair[1] + # Does it match one of our domains + mydomain = False + for _domain in mydomains: + if origaddr.endswith(f'@{_domain}'): + mydomain = True + break + if mydomain: + fromaddr = origaddr + else: + fromaddr = frompair[1] + # We can't just send this as-is due to DMARC policies. Therefore, we set + # Reply-To and X-Original-From. + origname = origpair[0] + if not origname: + origname = origpair[1] + msg.replace_header('From', f'{origname} via {servicename} <{fromaddr}>') + + if msg.get('X-Original-From'): + msg.replace_header('X-Original-From', origfrom) + else: + msg.add_header('X-Original-From', origfrom) + if msg.get('Reply-To'): + msg.replace_header('Reply-To', f'<{origpair[1]}>') + else: + msg.add_header('Reply-To', f'<{origpair[1]}>') + + # Does the subject start with [PATCH? + if subject.startswith('[PATCH '): + body = msg.get_payload() + # Parse it as a message and see if we get a From: header + cmsg = email.message_from_string(body) + if cmsg.get('From') is None: + cmsg.add_header('From', origfrom) + msg.set_payload(cmsg.as_string(policy=emlpolicy, maxheaderlen=0), charset='utf-8') + + alldests = utils.getaddresses([str(x) for x in msg.get_all('to', [])]) + alldests += utils.getaddresses([str(x) for x in msg.get_all('cc', [])]) + alwaysbcc = self._config['main'].get('alwaysbcc') + if alwaysbcc: + alldests += utils.getaddresses([alwaysbcc]) + destaddrs = {x[1] for x in alldests} + + bdata = msg.as_string(policy=emlpolicy).encode() + + if not self._config['main'].getboolean('dryrun'): + smtp.sendmail(fromaddr, list(destaddrs), bdata) + logger.info('Sent %s', subject) + else: + logger.info('---DRYRUN MSG START---') + logger.info(msg) + logger.info('---DRYRUN MSG END---') + + self.send_success(resp, message=f'Sent {len(msgs)} messages') + def on_post(self, req, resp): if not req.content_length: resp.status = falcon.HTTP_500 @@ -185,16 +406,29 @@ class SendReceiveListener(object): if action == 'auth-delete': self.auth_delete(jdata, resp) return + if action == 'receive': + self.receive(jdata, resp) + return resp.status = falcon.HTTP_500 resp.content_type = falcon.MEDIA_TEXT resp.text = 'Unknown action: %s\n' % action -app = falcon.App() -dburl = os.getenv('DB_URL', 'sqlite:///:memory:') +parser = ConfigParser(interpolation=ExtendedInterpolation()) +cfgfile = os.getenv('CONFIG') +if cfgfile: + parser.read(cfgfile) +else: + parser.read_string(DEFAULT_CFG) + +gpgbin = parser['main'].get('gpgbin') +if gpgbin: + patatt.GPGBIN = gpgbin +dburl = parser['main'].get('dburl') engine = sa.create_engine(dburl) -srl = SendReceiveListener(engine) +srl = SendReceiveListener(engine, parser) +app = falcon.App() mp = os.getenv('MOUNTPOINT', '/_b4_submit') app.add_route(mp, srl) -- cgit v1.2.3