diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-09-02 14:15:13 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-09-02 14:15:13 -0400 |
commit | fc921c6deadff376d728e41ba9d709d1ce8616df (patch) | |
tree | 1d9d80942878a02eb76330d601547d43b849b742 | |
parent | 6750e8da440da0cecbb924a3989142da448fb84d (diff) | |
download | b4-master.tar.gz |
We were not properly running dedupe on cached threads, so fix it by only
caching post-dedupe messages.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 995bd7e..f25b518 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -2519,19 +2519,26 @@ def split_and_dedupe_pi_results(t_mbox: bytes, cachedir: Optional[str] = None) - # Convert into individual files using git-mailsplit with tempfile.TemporaryDirectory(suffix='-mailsplit') as tfd: msgs = mailsplit_bytes(t_mbox, tfd) - if cachedir: - if os.path.exists(cachedir): - shutil.rmtree(cachedir) - shutil.copytree(tfd, cachedir) deduped = dict() + for msg in msgs: msgid = LoreMessage.get_clean_msgid(msg) if msgid in deduped: deduped[msgid] = LoreMessage.get_preferred_duplicate(deduped[msgid], msg) continue deduped[msgid] = msg - return list(deduped.values()) + + msgs = list(deduped.values()) + if cachedir: + if os.path.exists(cachedir): + shutil.rmtree(cachedir) + pathlib.Path(cachedir).mkdir(parents=True, exist_ok=True) + for at, msg in enumerate(msgs): + with open(os.path.join(cachedir, '%04d' % at), 'wb') as fh: + fh.write(msg.as_bytes()) + + return msgs def get_pi_thread_by_url(t_mbx_url: str, nocache: bool = False): |