From ab9c6a69a6bc0e1e4a5de232fee29acdaa69d2a5 Mon Sep 17 00:00:00 2001 From: Konstantin Ryabitsev Date: Mon, 21 Dec 2020 13:48:51 -0500 Subject: Add -f to "b4 mbox" to filter dupes When saving to a maildir, add option to filter out dupes. Note, that this requires going through the entire maildir to collect message-ids, so it's not going to be a great experience on large maildirs. Signed-off-by: Konstantin Ryabitsev --- b4/command.py | 2 ++ b4/mbox.py | 11 +++++++++-- man/b4.5 | 5 ++++- man/b4.5.rst | 1 + 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/b4/command.py b/b4/command.py index 1105a05..ef1e856 100644 --- a/b4/command.py +++ b/b4/command.py @@ -84,6 +84,8 @@ def cmd(): # b4 mbox sp_mbox = subparsers.add_parser('mbox', help='Download a thread as an mbox file') cmd_mbox_common_opts(sp_mbox) + sp_mbox.add_argument('-f', '--filter-dupes', dest='filterdupes', action='store_true', default=False, + help='When adding messages to existing maildir, filter out duplicates') sp_mbox.set_defaults(func=cmd_mbox) # b4 am diff --git a/b4/mbox.py b/b4/mbox.py index eeca363..38b2f0e 100644 --- a/b4/mbox.py +++ b/b4/mbox.py @@ -554,9 +554,16 @@ def main(cmdargs): and os.path.isdir(os.path.join(cmdargs.outdir, 'cur')) and os.path.isdir(os.path.join(cmdargs.outdir, 'tmp'))): mdr = mailbox.Maildir(cmdargs.outdir) + have_msgids = set() + added = 0 + if cmdargs.filterdupes: + for emsg in mdr: + have_msgids.add(b4.LoreMessage.get_clean_msgid(emsg)) for msg in mbx: - mdr.add(msg) - logger.info('Added to maildir %s', cmdargs.outdir) + if b4.LoreMessage.get_clean_msgid(msg) not in have_msgids: + added += 1 + mdr.add(msg) + logger.info('Added to %s messages to maildir %s', added, cmdargs.outdir) mbx.close() os.unlink(threadfile) return diff --git a/man/b4.5 b/man/b4.5 index 350c363..496525d 100644 --- a/man/b4.5 +++ b/man/b4.5 @@ -102,6 +102,9 @@ Instead of grabbing a thread from lore, process this mbox file .TP .B \-C\fP,\fB \-\-no\-cache Do not use local cache +.TP +.B \-f\fP,\fB \-\-filter\-dupes +When adding messages to existing maildir, filter out duplicates .UNINDENT .UNINDENT .sp @@ -341,7 +344,7 @@ Default configuration, with explanations: .ft C [b4] # Where to look up threads by message id - midmask = https://lore.kernel.org/r/%s\(aq + midmask = https://lore.kernel.org/r/%s # # When recording Link: trailers, use this mask linkmask = https://lore.kernel.org/r/%s diff --git a/man/b4.5.rst b/man/b4.5.rst index e21ed71..ee05675 100644 --- a/man/b4.5.rst +++ b/man/b4.5.rst @@ -63,6 +63,7 @@ optional arguments: -m LOCALMBOX, --use-local-mbox LOCALMBOX Instead of grabbing a thread from lore, process this mbox file -C, --no-cache Do not use local cache + -f, --filter-dupes When adding messages to existing maildir, filter out duplicates *Example*: b4 mbox 20200313231252.64999-1-keescook@chromium.org -- cgit v1.2.3