aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-08-17 17:00:02 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-08-17 17:00:02 -0400
commitdd6997138e01357d95c0e0212d9af33b977fd012 (patch)
tree32ce67e42eee3b689ceddb9f4543223a7e27df93
parent3e96a43accac28586e34a1a2b4a3c90948281bd0 (diff)
downloadb4-dd6997138e01357d95c0e0212d9af33b977fd012.tar.gz
Filter duplicate results when querying new series
When querying against /all/, we may get multiple hits for the same subject, so deal with it early. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/mbox.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/b4/mbox.py b/b4/mbox.py
index e11d2ae..bf6618b 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -418,6 +418,7 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N
t_mbx_url = '%s/%s/t.mbox.gz' % (listarc.rstrip('/'), nt_msgid)
potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache)
if potentials:
+ potentials = b4.get_strict_thread(potentials, nt_msgid)
nt_msgs += potentials
logger.info(' Added %s messages from that thread', len(potentials))
else:
@@ -461,6 +462,7 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N
resp.close()
ns = {'atom': 'http://www.w3.org/2005/Atom'}
entries = tree.findall('atom:entry', ns)
+ seen_urls = set()
for entry in entries:
title = entry.find('atom:title', ns).text
@@ -497,6 +499,9 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N
logger.debug('No idea what this is: %s', title)
continue
t_mbx_url = '%st.mbox.gz' % link
+ if t_mbx_url in seen_urls:
+ continue
+ seen_urls.add(t_mbx_url)
logger.info('New revision: %s', title)
potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache)
if potentials: