From 0f63a3f6a7398321bd799c7b778e0e721c811287 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Fri, 20 Mar 2020 16:35:08 -0400 Subject: Add caching layer Many lore.kernel.org operations can be repeated within quick succession of each-other (e.g. someone reruns a query with -t). This commit adds a caching layer that keeps lookups in local cache for 10 minutes (default). It can be made longer or shorter by editing the 'cache-expire' setting, or running "b4 am" with -C,--no-cache. Signed-off-by: Konstantin Ryabitsev --- b4/__init__.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++------- b4/command.py | 2 ++ b4/mbox.py | 22 ++++++++---- man/b4.5.rst | 4 +++ 4 files changed, 118 insertions(+), 18 deletions(-) diff --git a/b4/__init__.py b/b4/__init__.py index 7b9d2cf..237131a 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -12,7 +12,9 @@ import email.policy import requests import urllib.parse import datetime +import time +from pathlib import Path from tempfile import mkstemp from email import charset @@ -84,6 +86,8 @@ DEFAULT_CONFIG = { 'attestation-gnupghome': None, # Do you like simple or fancy checkmarks? 'attestation-checkmarks': 'fancy', + # How long to keep things in cache before expiring (minutes)? + 'cache-expire': '10', # If this is not set, we'll use what we find in # git-config for gpg.program, and if that's not set, # we'll use "gpg" and hope for the better @@ -101,6 +105,8 @@ ATTESTATIONS = list() SUBKEY_DATA = dict() # Used for storing our requests session REQSESSION = None +# Indicates that we've cleaned cache already +_CACHE_CLEANED = False class LoreMailbox: @@ -782,9 +788,9 @@ class LoreMessage: if fnmatch.fnmatch(trailer[0].lower(), trailermatch.strip()): fixlines.append('%s: %s' % trailer) if trailer not in btrailers: - logger.info(' Added: %s: %s' % trailer) + logger.info(' + %s: %s' % trailer) else: - logger.debug(' Kept: %s: %s' % trailer) + logger.debug(' . %s: %s' % trailer) added.append(trailer) trailersdone = True fixlines.append(line) @@ -1032,6 +1038,15 @@ class LoreAttestationDocument: if self.good and self.valid and self.trusted: self.passing = True + if source.find('http') == 0: + # We only cache known-good attestations obtained from remote + cachedir = get_cache_dir() + cachename = '%s.attestation' % urllib.parse.quote_plus(source.strip('/').split('/')[-1]) + fullpath = os.path.join(cachedir, cachename) + with open(fullpath, 'w') as fh: + logger.debug('Saved attestation in cache: %s', cachename) + fh.write(sigdata) + hg = [None, None, None] for line in sigdata.split('\n'): # It's a yaml document, but we don't parse it as yaml for safety reasons @@ -1068,26 +1083,60 @@ class LoreAttestationDocument: out.append(' | %s-%s-%s' % (hg[0][:8], hg[1][:8], hg[2][:8])) return '\n'.join(out) + @staticmethod + def get_from_cache(attid): + cachedir = get_cache_dir() + attdocs = list() + for entry in os.listdir(cachedir): + if entry.find('.attestation') <= 0: + continue + fullpath = os.path.join(cachedir, entry) + with open(fullpath, 'r') as fh: + content = fh.read() + # Can't be 0, because it has to have pgp ascii wrapper + if content.find(attid) > 0: + attdoc = LoreAttestationDocument(fullpath, content) + attdocs.append(attdoc) + return attdocs + @staticmethod def get_from_lore(attid): + attdocs = list() # XXX: Querying this via the Atom feed is a temporary kludge until we have # proper search API on lore.kernel.org + cachedir = get_cache_dir() + cachefile = os.path.join(cachedir, '%s.lookup' % urllib.parse.quote_plus(attid)) + status = None + if os.path.exists(cachefile): + with open(cachefile, 'r') as fh: + try: + status = int(fh.read()) + except ValueError: + pass + if status is not None and status != 200: + logger.debug('Cache says looking up %s = %s', attid, status) + return attdocs + config = get_main_config() queryurl = '%s?%s' % (config['attestation-query-url'], urllib.parse.urlencode({'q': attid, 'x': 'A', 'o': '-1'})) logger.debug('Query URL: %s', queryurl) session = get_requests_session() resp = session.get(queryurl) - content = resp.content.decode('utf-8') + if resp.status_code != 200: + # Record this as a bad hit + with open(cachefile, 'w') as fh: + fh.write(str(resp.status_code)) + matches = re.findall( r'link\s+href="([^"]+)".*?(-----BEGIN PGP SIGNED MESSAGE-----.*?-----END PGP SIGNATURE-----)', - content, flags=re.DOTALL + resp.content.decode('utf-8'), flags=re.DOTALL ) - attdocs = list() if matches: for link, sigdata in matches: - attdocs.append(LoreAttestationDocument(link, sigdata)) + attdoc = LoreAttestationDocument(link, sigdata) + attdocs.append(attdoc) return attdocs @@ -1113,22 +1162,27 @@ class LoreAttestation: self.passing = False self.attdocs = list() - def validate(self, lore_lookup=True): + def _check_if_passing(self): global ATTESTATIONS - hg = (self.i, self.m, self.p) for attdoc in ATTESTATIONS: if hg in attdoc.hashes and attdoc.passing: self.passing = True self.attdocs.append(attdoc) + def validate(self, lore_lookup=True): + global ATTESTATIONS + self._check_if_passing() + + if not len(self.attdocs): + attdocs = LoreAttestationDocument.get_from_cache(self.attid) + ATTESTATIONS += attdocs + self._check_if_passing() + if not len(self.attdocs) and lore_lookup: attdocs = LoreAttestationDocument.get_from_lore(self.attid) ATTESTATIONS += attdocs - for attdoc in attdocs: - if hg in attdoc.hashes and attdoc.passing: - self.passing = True - self.attdocs.append(attdoc) + self._check_if_passing() def __repr__(self): out = list() @@ -1228,6 +1282,36 @@ def get_main_config(): return MAIN_CONFIG +def get_cache_dir(): + global _CACHE_CLEANED + if 'XDG_CACHE_HOME' in os.environ: + cachehome = os.environ['XDG_CACHE_HOME'] + else: + cachehome = os.path.join(str(Path.home()), '.cache') + cachedir = os.path.join(cachehome, 'b4') + Path(cachedir).mkdir(parents=True, exist_ok=True) + if _CACHE_CLEANED: + return cachedir + + # Delete all .mbx and .lookup files older than cache-expire + config = get_main_config() + try: + expmin = int(config['cache-expire']) * 60 + except ValueError: + logger.critical('ERROR: cache-expire must be an integer (minutes): %s', config['cache-expire']) + expmin = 600 + expage = time.time() - expmin + for entry in os.listdir(cachedir): + if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0: + continue + st = os.stat(os.path.join(cachedir, entry)) + if st.st_mtime < expage: + logger.debug('Cleaning up cache: %s', entry) + os.unlink(os.path.join(cachedir, entry)) + _CACHE_CLEANED = True + return cachedir + + def get_user_config(): global USER_CONFIG if USER_CONFIG is None: diff --git a/b4/command.py b/b4/command.py index 9c2cbe9..eca416d 100644 --- a/b4/command.py +++ b/b4/command.py @@ -71,6 +71,8 @@ def cmd(): cmd_mbox_common_opts(sp_am) sp_am.add_argument('-v', '--use-version', dest='wantver', type=int, default=None, help='Get a specific version of the patch/series') + sp_am.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False, + help='Do not use local cache') sp_am.add_argument('-t', '--apply-cover-trailers', dest='covertrailers', action='store_true', default=False, help='Apply trailers sent to the cover letter to all patches') sp_am.add_argument('-T', '--no-add-trailers', dest='noaddtrailers', action='store_true', default=False, diff --git a/b4/mbox.py b/b4/mbox.py index 3c06bd2..ccd15ed 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -13,6 +13,7 @@ import email.message import email.utils import re import time +import shutil import urllib.parse import xml.etree.ElementTree @@ -53,6 +54,20 @@ def get_pi_thread_by_url(t_mbx_url, savefile): def get_pi_thread_by_msgid(msgid, config, cmdargs): wantname = cmdargs.wantname outdir = cmdargs.outdir + if wantname: + savefile = os.path.join(outdir, wantname) + else: + # Save it into msgid.mbox + savefile = '%s.t.mbx' % msgid + savefile = os.path.join(outdir, savefile) + + cachedir = b4.get_cache_dir() + cachefile = os.path.join(cachedir, '%s.pi.mbx' % urllib.parse.quote_plus(msgid)) + if os.path.exists(cachefile) and not cmdargs.nocache: + logger.debug('Using cached copy: %s', cachefile) + shutil.copyfile(cachefile, savefile) + return savefile + # Grab the head from lore, to see where we are redirected midmask = config['midmask'] % msgid logger.info('Looking up %s', midmask) @@ -64,12 +79,6 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs): canonical = resp.headers['Location'].rstrip('/') resp.close() t_mbx_url = '%s/t.mbox.gz' % canonical - if wantname: - savefile = os.path.join(outdir, wantname) - else: - # Save it into msgid.mbox - savefile = '%s.t.mbx' % msgid - savefile = os.path.join(outdir, savefile) loc = urllib.parse.urlparse(t_mbx_url) if cmdargs.useproject: @@ -87,6 +96,7 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs): in_mbx.close() out_mbx.close() os.unlink(in_mbxf) + shutil.copyfile(savefile, cachefile) return savefile diff --git a/man/b4.5.rst b/man/b4.5.rst index 161e665..1af8add 100644 --- a/man/b4.5.rst +++ b/man/b4.5.rst @@ -172,6 +172,10 @@ Default configuration, with explanations:: # git-config for gpg.program; and if that's not set, # we'll use "gpg" and hope for the best gpgbin = None + # + # How long to keep downloaded threads in cache (minutes)? + cache-expire = 10 + SUPPORT ------- -- cgit v1.2.3