diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-20 16:35:08 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-20 16:35:08 -0400 |
commit | 0f63a3f6a7398321bd799c7b778e0e721c811287 (patch) | |
tree | d617ad0aa9e361e8bb579941ee22a7149cf1c5ed | |
parent | 4cad662b69be9fa62460a342e2fd1aa87a7bd548 (diff) | |
download | b4-0f63a3f6a7398321bd799c7b778e0e721c811287.tar.gz |
Add caching layer
Many lore.kernel.org operations can be repeated within quick succession
of each-other (e.g. someone reruns a query with -t). This commit adds a
caching layer that keeps lookups in local cache for 10 minutes
(default). It can be made longer or shorter by editing the
'cache-expire' setting, or running "b4 am" with -C,--no-cache.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 108 | ||||
-rw-r--r-- | b4/command.py | 2 | ||||
-rw-r--r-- | b4/mbox.py | 22 | ||||
-rw-r--r-- | man/b4.5.rst | 4 |
4 files changed, 118 insertions, 18 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 7b9d2cf..237131a 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -12,7 +12,9 @@ import email.policy import requests import urllib.parse import datetime +import time +from pathlib import Path from tempfile import mkstemp from email import charset @@ -84,6 +86,8 @@ DEFAULT_CONFIG = { 'attestation-gnupghome': None, # Do you like simple or fancy checkmarks? 'attestation-checkmarks': 'fancy', + # How long to keep things in cache before expiring (minutes)? + 'cache-expire': '10', # If this is not set, we'll use what we find in # git-config for gpg.program, and if that's not set, # we'll use "gpg" and hope for the better @@ -101,6 +105,8 @@ ATTESTATIONS = list() SUBKEY_DATA = dict() # Used for storing our requests session REQSESSION = None +# Indicates that we've cleaned cache already +_CACHE_CLEANED = False class LoreMailbox: @@ -782,9 +788,9 @@ class LoreMessage: if fnmatch.fnmatch(trailer[0].lower(), trailermatch.strip()): fixlines.append('%s: %s' % trailer) if trailer not in btrailers: - logger.info(' Added: %s: %s' % trailer) + logger.info(' + %s: %s' % trailer) else: - logger.debug(' Kept: %s: %s' % trailer) + logger.debug(' . %s: %s' % trailer) added.append(trailer) trailersdone = True fixlines.append(line) @@ -1032,6 +1038,15 @@ class LoreAttestationDocument: if self.good and self.valid and self.trusted: self.passing = True + if source.find('http') == 0: + # We only cache known-good attestations obtained from remote + cachedir = get_cache_dir() + cachename = '%s.attestation' % urllib.parse.quote_plus(source.strip('/').split('/')[-1]) + fullpath = os.path.join(cachedir, cachename) + with open(fullpath, 'w') as fh: + logger.debug('Saved attestation in cache: %s', cachename) + fh.write(sigdata) + hg = [None, None, None] for line in sigdata.split('\n'): # It's a yaml document, but we don't parse it as yaml for safety reasons @@ -1069,25 +1084,59 @@ class LoreAttestationDocument: return '\n'.join(out) @staticmethod + def get_from_cache(attid): + cachedir = get_cache_dir() + attdocs = list() + for entry in os.listdir(cachedir): + if entry.find('.attestation') <= 0: + continue + fullpath = os.path.join(cachedir, entry) + with open(fullpath, 'r') as fh: + content = fh.read() + # Can't be 0, because it has to have pgp ascii wrapper + if content.find(attid) > 0: + attdoc = LoreAttestationDocument(fullpath, content) + attdocs.append(attdoc) + return attdocs + + @staticmethod def get_from_lore(attid): + attdocs = list() # XXX: Querying this via the Atom feed is a temporary kludge until we have # proper search API on lore.kernel.org + cachedir = get_cache_dir() + cachefile = os.path.join(cachedir, '%s.lookup' % urllib.parse.quote_plus(attid)) + status = None + if os.path.exists(cachefile): + with open(cachefile, 'r') as fh: + try: + status = int(fh.read()) + except ValueError: + pass + if status is not None and status != 200: + logger.debug('Cache says looking up %s = %s', attid, status) + return attdocs + config = get_main_config() queryurl = '%s?%s' % (config['attestation-query-url'], urllib.parse.urlencode({'q': attid, 'x': 'A', 'o': '-1'})) logger.debug('Query URL: %s', queryurl) session = get_requests_session() resp = session.get(queryurl) - content = resp.content.decode('utf-8') + if resp.status_code != 200: + # Record this as a bad hit + with open(cachefile, 'w') as fh: + fh.write(str(resp.status_code)) + matches = re.findall( r'link\s+href="([^"]+)".*?(-----BEGIN PGP SIGNED MESSAGE-----.*?-----END PGP SIGNATURE-----)', - content, flags=re.DOTALL + resp.content.decode('utf-8'), flags=re.DOTALL ) - attdocs = list() if matches: for link, sigdata in matches: - attdocs.append(LoreAttestationDocument(link, sigdata)) + attdoc = LoreAttestationDocument(link, sigdata) + attdocs.append(attdoc) return attdocs @@ -1113,22 +1162,27 @@ class LoreAttestation: self.passing = False self.attdocs = list() - def validate(self, lore_lookup=True): + def _check_if_passing(self): global ATTESTATIONS - hg = (self.i, self.m, self.p) for attdoc in ATTESTATIONS: if hg in attdoc.hashes and attdoc.passing: self.passing = True self.attdocs.append(attdoc) + def validate(self, lore_lookup=True): + global ATTESTATIONS + self._check_if_passing() + + if not len(self.attdocs): + attdocs = LoreAttestationDocument.get_from_cache(self.attid) + ATTESTATIONS += attdocs + self._check_if_passing() + if not len(self.attdocs) and lore_lookup: attdocs = LoreAttestationDocument.get_from_lore(self.attid) ATTESTATIONS += attdocs - for attdoc in attdocs: - if hg in attdoc.hashes and attdoc.passing: - self.passing = True - self.attdocs.append(attdoc) + self._check_if_passing() def __repr__(self): out = list() @@ -1228,6 +1282,36 @@ def get_main_config(): return MAIN_CONFIG +def get_cache_dir(): + global _CACHE_CLEANED + if 'XDG_CACHE_HOME' in os.environ: + cachehome = os.environ['XDG_CACHE_HOME'] + else: + cachehome = os.path.join(str(Path.home()), '.cache') + cachedir = os.path.join(cachehome, 'b4') + Path(cachedir).mkdir(parents=True, exist_ok=True) + if _CACHE_CLEANED: + return cachedir + + # Delete all .mbx and .lookup files older than cache-expire + config = get_main_config() + try: + expmin = int(config['cache-expire']) * 60 + except ValueError: + logger.critical('ERROR: cache-expire must be an integer (minutes): %s', config['cache-expire']) + expmin = 600 + expage = time.time() - expmin + for entry in os.listdir(cachedir): + if entry.find('.mbx') <= 0 and entry.find('.lookup') <= 0: + continue + st = os.stat(os.path.join(cachedir, entry)) + if st.st_mtime < expage: + logger.debug('Cleaning up cache: %s', entry) + os.unlink(os.path.join(cachedir, entry)) + _CACHE_CLEANED = True + return cachedir + + def get_user_config(): global USER_CONFIG if USER_CONFIG is None: diff --git a/b4/command.py b/b4/command.py index 9c2cbe9..eca416d 100644 --- a/b4/command.py +++ b/b4/command.py @@ -71,6 +71,8 @@ def cmd(): cmd_mbox_common_opts(sp_am) sp_am.add_argument('-v', '--use-version', dest='wantver', type=int, default=None, help='Get a specific version of the patch/series') + sp_am.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False, + help='Do not use local cache') sp_am.add_argument('-t', '--apply-cover-trailers', dest='covertrailers', action='store_true', default=False, help='Apply trailers sent to the cover letter to all patches') sp_am.add_argument('-T', '--no-add-trailers', dest='noaddtrailers', action='store_true', default=False, @@ -13,6 +13,7 @@ import email.message import email.utils import re import time +import shutil import urllib.parse import xml.etree.ElementTree @@ -53,6 +54,20 @@ def get_pi_thread_by_url(t_mbx_url, savefile): def get_pi_thread_by_msgid(msgid, config, cmdargs): wantname = cmdargs.wantname outdir = cmdargs.outdir + if wantname: + savefile = os.path.join(outdir, wantname) + else: + # Save it into msgid.mbox + savefile = '%s.t.mbx' % msgid + savefile = os.path.join(outdir, savefile) + + cachedir = b4.get_cache_dir() + cachefile = os.path.join(cachedir, '%s.pi.mbx' % urllib.parse.quote_plus(msgid)) + if os.path.exists(cachefile) and not cmdargs.nocache: + logger.debug('Using cached copy: %s', cachefile) + shutil.copyfile(cachefile, savefile) + return savefile + # Grab the head from lore, to see where we are redirected midmask = config['midmask'] % msgid logger.info('Looking up %s', midmask) @@ -64,12 +79,6 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs): canonical = resp.headers['Location'].rstrip('/') resp.close() t_mbx_url = '%s/t.mbox.gz' % canonical - if wantname: - savefile = os.path.join(outdir, wantname) - else: - # Save it into msgid.mbox - savefile = '%s.t.mbx' % msgid - savefile = os.path.join(outdir, savefile) loc = urllib.parse.urlparse(t_mbx_url) if cmdargs.useproject: @@ -87,6 +96,7 @@ def get_pi_thread_by_msgid(msgid, config, cmdargs): in_mbx.close() out_mbx.close() os.unlink(in_mbxf) + shutil.copyfile(savefile, cachefile) return savefile diff --git a/man/b4.5.rst b/man/b4.5.rst index 161e665..1af8add 100644 --- a/man/b4.5.rst +++ b/man/b4.5.rst @@ -172,6 +172,10 @@ Default configuration, with explanations:: # git-config for gpg.program; and if that's not set, # we'll use "gpg" and hope for the best gpgbin = None + # + # How long to keep downloaded threads in cache (minutes)? + cache-expire = 10 + SUPPORT ------- |