aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-12-21 13:48:51 -0500
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-12-21 13:50:58 -0500
commitab9c6a69a6bc0e1e4a5de232fee29acdaa69d2a5 (patch)
tree6527832c83491b29ccc87da812e1cd5a35658dcf
parent10af809c0c75f6b229f72356eca07d19a4f24480 (diff)
downloadb4-ab9c6a69a6bc0e1e4a5de232fee29acdaa69d2a5.tar.gz
Add -f to "b4 mbox" to filter dupes
When saving to a maildir, add option to filter out dupes. Note, that this requires going through the entire maildir to collect message-ids, so it's not going to be a great experience on large maildirs. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/command.py2
-rw-r--r--b4/mbox.py11
-rw-r--r--man/b4.55
-rw-r--r--man/b4.5.rst1
4 files changed, 16 insertions, 3 deletions
diff --git a/b4/command.py b/b4/command.py
index 1105a05..ef1e856 100644
--- a/b4/command.py
+++ b/b4/command.py
@@ -84,6 +84,8 @@ def cmd():
# b4 mbox
sp_mbox = subparsers.add_parser('mbox', help='Download a thread as an mbox file')
cmd_mbox_common_opts(sp_mbox)
+ sp_mbox.add_argument('-f', '--filter-dupes', dest='filterdupes', action='store_true', default=False,
+ help='When adding messages to existing maildir, filter out duplicates')
sp_mbox.set_defaults(func=cmd_mbox)
# b4 am
diff --git a/b4/mbox.py b/b4/mbox.py
index eeca363..38b2f0e 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -554,9 +554,16 @@ def main(cmdargs):
and os.path.isdir(os.path.join(cmdargs.outdir, 'cur'))
and os.path.isdir(os.path.join(cmdargs.outdir, 'tmp'))):
mdr = mailbox.Maildir(cmdargs.outdir)
+ have_msgids = set()
+ added = 0
+ if cmdargs.filterdupes:
+ for emsg in mdr:
+ have_msgids.add(b4.LoreMessage.get_clean_msgid(emsg))
for msg in mbx:
- mdr.add(msg)
- logger.info('Added to maildir %s', cmdargs.outdir)
+ if b4.LoreMessage.get_clean_msgid(msg) not in have_msgids:
+ added += 1
+ mdr.add(msg)
+ logger.info('Added to %s messages to maildir %s', added, cmdargs.outdir)
mbx.close()
os.unlink(threadfile)
return
diff --git a/man/b4.5 b/man/b4.5
index 350c363..496525d 100644
--- a/man/b4.5
+++ b/man/b4.5
@@ -102,6 +102,9 @@ Instead of grabbing a thread from lore, process this mbox file
.TP
.B \-C\fP,\fB \-\-no\-cache
Do not use local cache
+.TP
+.B \-f\fP,\fB \-\-filter\-dupes
+When adding messages to existing maildir, filter out duplicates
.UNINDENT
.UNINDENT
.sp
@@ -341,7 +344,7 @@ Default configuration, with explanations:
.ft C
[b4]
# Where to look up threads by message id
- midmask = https://lore.kernel.org/r/%s\(aq
+ midmask = https://lore.kernel.org/r/%s
#
# When recording Link: trailers, use this mask
linkmask = https://lore.kernel.org/r/%s
diff --git a/man/b4.5.rst b/man/b4.5.rst
index e21ed71..ee05675 100644
--- a/man/b4.5.rst
+++ b/man/b4.5.rst
@@ -63,6 +63,7 @@ optional arguments:
-m LOCALMBOX, --use-local-mbox LOCALMBOX
Instead of grabbing a thread from lore, process this mbox file
-C, --no-cache Do not use local cache
+ -f, --filter-dupes When adding messages to existing maildir, filter out duplicates
*Example*: b4 mbox 20200313231252.64999-1-keescook@chromium.org