diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-08-17 17:00:02 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-08-17 17:00:02 -0400 |
commit | dd6997138e01357d95c0e0212d9af33b977fd012 (patch) | |
tree | 32ce67e42eee3b689ceddb9f4543223a7e27df93 | |
parent | 3e96a43accac28586e34a1a2b4a3c90948281bd0 (diff) | |
download | b4-dd6997138e01357d95c0e0212d9af33b977fd012.tar.gz |
Filter duplicate results when querying new series
When querying against /all/, we may get multiple hits for the same
subject, so deal with it early.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/mbox.py | 5 |
1 files changed, 5 insertions, 0 deletions
@@ -418,6 +418,7 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N t_mbx_url = '%s/%s/t.mbox.gz' % (listarc.rstrip('/'), nt_msgid) potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache) if potentials: + potentials = b4.get_strict_thread(potentials, nt_msgid) nt_msgs += potentials logger.info(' Added %s messages from that thread', len(potentials)) else: @@ -461,6 +462,7 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N resp.close() ns = {'atom': 'http://www.w3.org/2005/Atom'} entries = tree.findall('atom:entry', ns) + seen_urls = set() for entry in entries: title = entry.find('atom:title', ns).text @@ -497,6 +499,9 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N logger.debug('No idea what this is: %s', title) continue t_mbx_url = '%st.mbox.gz' % link + if t_mbx_url in seen_urls: + continue + seen_urls.add(t_mbx_url) logger.info('New revision: %s', title) potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache) if potentials: |