Source code for mail_deduplicate.tests.test_strategy

# Copyright Kevin Deldycke <kevin@deldycke.com> and contributors.
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
from __future__ import annotations

from mailbox import Maildir
from string import ascii_lowercase

import arrow
import pytest

from mail_deduplicate.strategy import (
    DISCARD_ALL_BUT_ONE,
    DISCARD_BIGGER,
    DISCARD_BIGGEST,
    DISCARD_NEWER,
    DISCARD_NEWEST,
    DISCARD_OLDER,
    DISCARD_OLDEST,
    DISCARD_ONE,
    DISCARD_SMALLER,
    DISCARD_SMALLEST,
    SELECT_ALL_BUT_ONE,
    SELECT_BIGGER,
    SELECT_BIGGEST,
    SELECT_NEWER,
    SELECT_NEWEST,
    SELECT_OLDER,
    SELECT_OLDEST,
    SELECT_ONE,
    SELECT_SMALLER,
    SELECT_SMALLEST,
    STRATEGY_METHODS,
)

from .conftest import MailFactory, check_box


[docs]def test_strategy_definitions(): """Test deduplication strategy definitions.""" for strategy_id, method in STRATEGY_METHODS.items(): # All strategies are lower cases strings, with dashes. assert isinstance(strategy_id, str) assert set(strategy_id).issubset(ascii_lowercase + "-") assert callable(method)
# Time-based collection of pre-defined fixtures. now = arrow.utcnow() newest_mail = MailFactory(date=now) newer_mail = MailFactory(date=now.shift(minutes=-1)) older_mail = MailFactory(date=now.shift(minutes=-2)) oldest_mail = MailFactory(date=now.shift(minutes=-3)) # Size-based collection of pre-defined fixtures. smallest_mail = MailFactory(body="Hello I am a duplicate mail. With annoying ćĥäŖş.") smaller_mail = MailFactory(body="Hello I am a duplicate mail. With annoying ćĥäŖş. ++") bigger_mail = MailFactory( body="Hello I am a duplicate mail. With annoying ćĥäŖş. +++++", ) biggest_mail = MailFactory( body="Hello I am a duplicate mail. With annoying ćĥäŖş. +++++++++", ) # Quantity-based collection of pre-defined fixtures. random_mail_1 = MailFactory(message_id=MailFactory.random_string(30)) random_mail_2 = MailFactory(message_id=MailFactory.random_string(30)) random_mail_3 = MailFactory(message_id=MailFactory.random_string(30)) # List of strategies and their required dummy parameters. strategy_options: dict[str, list[str]] = dict.fromkeys(STRATEGY_METHODS, []) # Add dummy regexps. strategy_options.update( { "discard-matching-path": ["--regexp=.*"], "discard-non-matching-path": ["--regexp=.*"], "select-matching-path": ["--regexp=.*"], "select-non-matching-path": ["--regexp=.*"], }, )
[docs]@pytest.mark.parametrize(("strategy_id", "params"), strategy_options.items()) def test_maildir_dry_run(invoke, make_box, strategy_id, params): """Check no mail is removed in dry-run mode.""" box_path, box_type = make_box( Maildir, [ newest_mail, newest_mail, newer_mail, newer_mail, older_mail, older_mail, oldest_mail, oldest_mail, smallest_mail, smallest_mail, smaller_mail, smaller_mail, bigger_mail, bigger_mail, biggest_mail, biggest_mail, random_mail_1, random_mail_1, random_mail_2, random_mail_2, random_mail_3, random_mail_3, ], ) result = invoke( f"--strategy={strategy_id}", *params, "--action=delete-selected", "--dry-run", box_path, ) assert result.exit_code == 0 check_box( box_path, box_type, content=[ newest_mail, newest_mail, newer_mail, newer_mail, older_mail, older_mail, oldest_mail, oldest_mail, smallest_mail, smallest_mail, smaller_mail, smaller_mail, bigger_mail, bigger_mail, biggest_mail, biggest_mail, random_mail_1, random_mail_1, random_mail_2, random_mail_2, random_mail_3, random_mail_3, ], )
# List of (strategy_id, mailbox_input, mailbox_results, case_id). test_cases = [ # Whatever the time-based or size-based strategy, the duplicate set is not # actionable if the selection criterion doesn't produce any match. ( "no_match", [ DISCARD_OLDER, DISCARD_OLDEST, DISCARD_NEWER, DISCARD_NEWEST, SELECT_OLDER, SELECT_OLDEST, SELECT_NEWER, SELECT_NEWEST, DISCARD_SMALLER, DISCARD_SMALLEST, DISCARD_BIGGER, DISCARD_BIGGEST, SELECT_SMALLER, SELECT_SMALLEST, SELECT_BIGGER, SELECT_BIGGEST, ], [random_mail_1, random_mail_1], [random_mail_1, random_mail_1], ), ( "older_selection", [SELECT_OLDER, DISCARD_NEWEST], [ oldest_mail, newest_mail, oldest_mail, newer_mail, older_mail, older_mail, newer_mail, newest_mail, ], # Newest mails are selected but not the older ones. [newest_mail, newest_mail], ), ( "oldest_selection", [SELECT_OLDEST, DISCARD_NEWER], [ oldest_mail, newest_mail, oldest_mail, newer_mail, older_mail, older_mail, newer_mail, newest_mail, ], # Newer mails are selected but not the oldest ones. [ newest_mail, newer_mail, older_mail, older_mail, newer_mail, newest_mail, ], ), ( "newer_selection", [SELECT_NEWER, DISCARD_OLDEST], [ oldest_mail, newest_mail, oldest_mail, newer_mail, older_mail, older_mail, newer_mail, newest_mail, ], # Oldest mails are selected but not the newer ones. [oldest_mail, oldest_mail], ), ( "newest_selection", [SELECT_NEWEST, DISCARD_OLDER], [ oldest_mail, newest_mail, oldest_mail, newer_mail, older_mail, older_mail, newer_mail, newest_mail, ], # Older mails are selected but not the newest ones. [ oldest_mail, oldest_mail, newer_mail, older_mail, older_mail, newer_mail, ], ), ( "smaller_selection", [SELECT_SMALLER, DISCARD_BIGGEST], [ smallest_mail, biggest_mail, smallest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, biggest_mail, ], # Biggest mails are selected but not the smaller ones. [biggest_mail, biggest_mail], ), ( "smallest_selection", [SELECT_SMALLEST, DISCARD_BIGGER], [ smallest_mail, biggest_mail, smallest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, biggest_mail, ], # Bigger mails are selected but not the smallest ones. [ biggest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, biggest_mail, ], ), ( "bigger_selection", [SELECT_BIGGER, DISCARD_SMALLEST], [ smallest_mail, biggest_mail, smallest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, biggest_mail, ], # Smallest mails are selected but not the bigger ones. [smallest_mail, smallest_mail], ), ( "biggest_selection", [SELECT_BIGGEST, DISCARD_SMALLER], [ smallest_mail, biggest_mail, smallest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, biggest_mail, ], # Smaller mails are selected but not the biggest ones. [ smallest_mail, smallest_mail, bigger_mail, smaller_mail, smaller_mail, bigger_mail, ], ), ( "one_selection", [SELECT_ONE, DISCARD_ALL_BUT_ONE], [ random_mail_1, random_mail_2, random_mail_2, random_mail_1, random_mail_3, random_mail_2, ], [ random_mail_1, random_mail_2, random_mail_2, random_mail_3, ], ), ( "all_but_one_selection", [SELECT_ALL_BUT_ONE, DISCARD_ONE], [ random_mail_1, random_mail_2, random_mail_2, random_mail_1, random_mail_3, random_mail_2, ], [ random_mail_1, random_mail_2, random_mail_3, ], ), ]
[docs]@pytest.mark.parametrize( ("strategy_id", "mailbox_input", "mailbox_results"), [ pytest.param( strategy_id, mailbox_input, mailbox_results, id=f"{case_id}|{strategy_id}", ) for case_id, strategy_ids, mailbox_input, mailbox_results in test_cases for strategy_id in strategy_ids ], ) def test_maildir_strategy( invoke, make_box, strategy_id, mailbox_input, mailbox_results, ): """Generic test to check the result of a selection strategy.""" box_path, box_type = make_box(Maildir, mailbox_input) result = invoke(f"--strategy={strategy_id}", "--action=delete-selected", box_path) assert result.exit_code == 0 check_box(box_path, box_type, content=mailbox_results)