aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2022-09-02 14:15:13 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2022-09-02 14:15:13 -0400
commitfc921c6deadff376d728e41ba9d709d1ce8616df (patch)
tree1d9d80942878a02eb76330d601547d43b849b742
parent6750e8da440da0cecbb924a3989142da448fb84d (diff)
downloadb4-master.tar.gz
Fix dedupe on using cached threadsHEADmaster
We were not properly running dedupe on cached threads, so fix it by only caching post-dedupe messages. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py17
1 files changed, 12 insertions, 5 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 995bd7e..f25b518 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -2519,19 +2519,26 @@ def split_and_dedupe_pi_results(t_mbox: bytes, cachedir: Optional[str] = None) -
# Convert into individual files using git-mailsplit
with tempfile.TemporaryDirectory(suffix='-mailsplit') as tfd:
msgs = mailsplit_bytes(t_mbox, tfd)
- if cachedir:
- if os.path.exists(cachedir):
- shutil.rmtree(cachedir)
- shutil.copytree(tfd, cachedir)
deduped = dict()
+
for msg in msgs:
msgid = LoreMessage.get_clean_msgid(msg)
if msgid in deduped:
deduped[msgid] = LoreMessage.get_preferred_duplicate(deduped[msgid], msg)
continue
deduped[msgid] = msg
- return list(deduped.values())
+
+ msgs = list(deduped.values())
+ if cachedir:
+ if os.path.exists(cachedir):
+ shutil.rmtree(cachedir)
+ pathlib.Path(cachedir).mkdir(parents=True, exist_ok=True)
+ for at, msg in enumerate(msgs):
+ with open(os.path.join(cachedir, '%04d' % at), 'wb') as fh:
+ fh.write(msg.as_bytes())
+
+ return msgs
def get_pi_thread_by_url(t_mbx_url: str, nocache: bool = False):