#!/usr/bin/env pypy3 # pylint: disable=invalid-name,superfluous-parens # pylama: ignore=I0011 """Test anagrams_mod.""" import sys import time import collections import modunits import anagrams_mod class Anagram_pair(object): # pylint: disable=too-few-public-methods """Just store a matching pair of anagrams.""" def __init__(self, input_sentence, output_sentence): """Initialize an input/output sentence pair.""" self.input_sentence = input_sentence.lower() self.output_sentence = output_sentence.lower() def test_get_words_dict(dictionary): """Test get_words_dict function.""" all_good = True if 'a' not in dictionary[1]: sys.stderr.write('{}: a not in test-dictionary.txt\n'.format(sys.argv[0])) all_good = False if 'b' in dictionary[1]: sys.stderr.write('{}: b in test-dictionary.txt, but should not be\n'.format(sys.argv[0])) all_good = False if 'fred' not in dictionary[4]: sys.stderr.write('{}: fred not in test-dictionary.txt\n'.format(sys.argv[0])) all_good = False return all_good def test_get_alphabet(): """Test get_alphabet function.""" all_good = True for sentence, letter, count in [('sentence', 'e', 3), ('abc def', 'a', 1)]: alphabet = anagrams_mod.get_alphabet(sentence) expected_result = collections.Counter(sentence) if ' ' in expected_result: del expected_result[' '] if alphabet.frequencies == expected_result: # good, they're equal pass else: sys.stderr.write('{}: alphabet ({}) != expected_result ({})\n'.format(sys.argv[0], alphabet, expected_result)) all_good = False if alphabet[letter] == count: # good, the count is correct pass else: sys.stderr.write('{}: alphabet[letter] ({}) != count ({})\n'.format(sys.argv[0], alphabet[letter], count)) all_good = False return all_good def test_lf_is_equal(): """Test equality and inequality of LetterFrequencies.""" all_good = True lf1 = anagrams_mod.LetterFrequencies('sentence') lf2 = anagrams_mod.LetterFrequencies('sentnece') lf3 = anagrams_mod.LetterFrequencies('blahblah') if lf1.is_equal(lf2): # good, they're equal pass else: sys.stderr.write('{}: not lf1.is_equal(lf2)\n'.format(sys.argv[0], )) all_good = False if lf1.is_equal(lf3): sys.stderr.write('{}: lf1.is_equal(lf3)\n'.format(sys.argv[0], )) all_good = False else: # good, they're not equal pass return all_good def test_lf_is_subset(): """Test subset and not subset of LetterFrequencies.""" all_good = True lf1 = anagrams_mod.LetterFrequencies('sentence') lf2 = anagrams_mod.LetterFrequencies('sentnece') lf3 = anagrams_mod.LetterFrequencies('blahblah') lf4 = anagrams_mod.LetterFrequencies('sent') if lf1.is_subset(lf2): # good, lf1 is a subset of lf2 - actually, they're equal pass else: sys.stderr.write('{}: not lf1.is_subset(lf2)\n'.format(sys.argv[0], )) all_good = False if lf1.is_subset(lf3): sys.stderr.write('{}: lf1.is_subset(lf3)\n'.format(sys.argv[0], )) all_good = False else: # good, lf3 is not a subset of lf1 pass if lf4.is_subset(lf1): # good, lf4 is a (proper, as it happens, though that's not what we're checking) subset of lf1 pass else: sys.stderr.write('{}: not lf4.is_subset(lf1)\n'.format(sys.argv[0], )) all_good = False return all_good def test_lf_str(): """Test str(LetterFrequencies).""" all_good = True lf = anagrams_mod.LetterFrequencies('aab') str_lf = str(lf) # Being careful to avoid assuming anything about the order, while still testing. # Should resemble: Counter({'a': 2, 'b': 1}) for string in ['Counter', "'a'", "'b'", '2', '1', ':']: if string in str_lf: pass else: sys.stderr.write('{}: {} not in str_lf\n'.format(sys.argv[0], string)) all_good = False return all_good def test_lf_in(): """Test letter in LetterFrequencies.""" all_good = True lf = anagrams_mod.LetterFrequencies('sentence') if 'e' in lf: # Good, e is present pass else: sys.stderr.write('{}: "e" not in lf\n'.format(sys.argv[0], )) all_good = False return all_good def test_lf_getitem(): """Test LetterFrequencies getitem.""" all_good = True lf = anagrams_mod.LetterFrequencies('sentence') if 'e' in lf: # Good, e is present if lf['e'] == 3: # good, e is there 3 times pass else: sys.stderr.write('{}: e not in lf 3 times\n'.format(sys.argv[0], )) all_good = False else: sys.stderr.write('{}: "e" not in lf\n'.format(sys.argv[0], )) all_good = False if 't' in lf: # Good, t is present if lf['t'] == 1: # good, t is there 1 time pass else: sys.stderr.write('{}: t not in lf 1 time\n'.format(sys.argv[0], )) all_good = False else: sys.stderr.write('{}: "t" not in lf\n'.format(sys.argv[0], )) all_good = False return all_good def test_lf_len(): """Test len(LetterFrequencies).""" all_good = True lf = anagrams_mod.LetterFrequencies('sentence') # 1 c # 3 e # 2 n # 1 s # 1 t if len(lf) == 5: # Good, length is 5 pass else: sys.stderr.write('{}: len(lf) is not 5\n'.format(sys.argv[0])) all_good = False return all_good def test_lf_delitem(): """Test del LetterFrequencies[item].""" all_good = True lf = anagrams_mod.LetterFrequencies('sentence') # 1 c # 3 e # 2 n # 1 s # 1 t del lf['n'] if len(lf) == 4: # Good, length is now 4 pass else: sys.stderr.write('{}: len(lf) is not 4\n'.format(sys.argv[0])) all_good = False return all_good def test_lf_setitem(): """Test LetterFrequencies[letter] = count.""" all_good = True lf = anagrams_mod.LetterFrequencies('sentence') # 1 c # 3 e # 2 n # 1 s # 1 t lf['n'] = 4 if len(lf) == 5: # Good, length is still 5 pass else: sys.stderr.write('{}: len(lf) is not 5\n'.format(sys.argv[0])) all_good = False if lf['n'] == 4: # Good, n is 4 pass else: sys.stderr.write('{}: lf[n] is not 4\n'.format(sys.argv[0])) all_good = False return all_good def test_lf_sub(): """Test lf1 - lf2.""" all_good = True lf1 = anagrams_mod.LetterFrequencies('sentence') lf2 = anagrams_mod.LetterFrequencies('ntn') lf3 = lf1 - lf2 if len(lf3) == 2: # Good, length is 2 pass else: sys.stderr.write('{}: len(lf3) is not 2\n'.format(sys.argv[0])) all_good = False if lf3['c'] == 1: # Good, lf3[c] is 1 pass else: sys.stderr.write('{}: lf3[c] is not 1\n'.format(sys.argv[0])) all_good = False if lf3['s'] == 1: # Good, lf3[t] is 1 pass else: sys.stderr.write('{}: lf3[s] is not 1\n'.format(sys.argv[0])) all_good = False if 'n' in lf3: # bad, we subtracted these out sys.stderr.write('{}: lf3[n] is in lf3\n'.format(sys.argv[0])) all_good = False else: # good, all the n's are gone pass return all_good def test_is_anagram(): """Test is_anagram method.""" all_good = True candidate_sentence = 'abcbcdef' sentence_frequencies = anagrams_mod.LetterFrequencies(candidate_sentence) if anagrams_mod.is_anagram(sentence_frequencies, candidate_sentence): # good, it's an anagram pass else: sys.stderr.write('{}: not an anagram\n'.format(sys.argv[0])) all_good = False sentence_frequencies = anagrams_mod.LetterFrequencies('abc') if anagrams_mod.is_anagram(sentence_frequencies, candidate_sentence): # bad, it's an anagram sys.stderr.write('{}: is an anagram\n'.format(sys.argv[0])) all_good = False else: # Good, it's not an anagram pass return all_good def test_prune_words_dict_for_alphabet(dictionary): """Test prune_words_dict_for_alphabet function.""" all_good = True alphabet = anagrams_mod.get_alphabet('redf') alphabet_words_dict = anagrams_mod.prune_words_dict_for_alphabet(dictionary, alphabet) if alphabet_words_dict[4] == {'fred'}: # good, 4 has fred pass else: sys.stderr.write('{}: 4 is not fred\n'.format(sys.argv[0])) all_good = False if 3 in alphabet_words_dict: sys.stderr.write('{}: 3 is in alphabet_words_dict\n'.format(sys.argv[0])) all_good = False else: # good, 3 not in alphabet_words_dict pass return all_good def test_pairs(): """Test known pairs of anagrams.""" all_good = True input_list = [] input_list.append(Anagram_pair('a', 'a')) input_list.append(Anagram_pair('I', 'I')) input_list.append(Anagram_pair('am', 'ma')) input_list.append(Anagram_pair('ska', 'ask')) input_list.append(Anagram_pair('dame', 'mead')) input_list.append(Anagram_pair('silent', 'listen')) input_list.append(Anagram_pair('The eyes', 'They see')) input_list.append(Anagram_pair('Fir cones', 'Conifers')) input_list.append(Anagram_pair('Dormitory', 'Dirty room')) input_list.append(Anagram_pair('Debit card', 'Bad credit')) input_list.append(Anagram_pair('Punishment', 'Nine Thumps')) input_list.append(Anagram_pair('Dabo Swinney', 'Soybean Wind')) # 1 minute, 37 seconds input_list.append(Anagram_pair('The Morse code', 'Here come dots')) # 12 input_list.append(Anagram_pair('The Hurricanes', 'These churn air')) # 13 # 13 hours :) I suspect it was thrashing on a machine with 8 gig of physmem # input_list.append(Anagram_pair('antidemocratic', 'dictator came in')) # 14 # 3 minutes, 51 seconds # input_list.append(Anagram_pair('School master', 'The classroom')) # ?7 minutes # input_list.append(Anagram_pair('Conversation', 'Voices rant on')) # ?31 minutes # input_list.append(Anagram_pair('The Hurricanes', 'These churn air')) # ?31 minutes # input_list.append(Anagram_pair('Eleven plus two', 'Twelve plus one')) # This took about 4 hours # input_list.append(Anagram_pair('Payment received', 'Every cent paid me')) # 15 # input_list.append(Anagram_pair('Microsoft Windows', 'Sown in discomfort')) # 16 # input_list.append(Anagram_pair('A domesticated animal', 'Docile as a man Tamed it')) # This took more than a day before I killed it # input_list.append(Anagram_pair('The public art galleries', 'Large picture halls I bet')) for anagram_pair in input_list: sys.stderr.write('{} ({} chars) - {}\n'.format( anagram_pair.input_sentence, len(anagram_pair.input_sentence.replace(' ', '')), anagram_pair.output_sentence, )) for anagram_pair in input_list: time0 = time.time() with open('/usr/share/dict/words', 'r', encoding='utf-8') as dictionary_file: result_list = anagrams_mod.find_anagrams(False, dictionary_file, anagram_pair.input_sentence) time1 = time.time() time_diff = round(time1 - time0) time_diff_str = modunits.modunits( 'time', time_diff, units='unabbreviated', reverse=True, detail='two-highest', ) output_set = set(result_list) if anagram_pair.output_sentence in output_set: sys.stderr.write('{}: good pair: input: {}, expected output: {}, {}\n'.format( sys.argv[0], anagram_pair.input_sentence, anagram_pair.output_sentence, time_diff_str, )) else: sys.stderr.write('{}: bad pair: input: {}, expected output: {}, {}\n'.format( sys.argv[0], anagram_pair.input_sentence, anagram_pair.output_sentence, time_diff_str, )) all_good = False return all_good def test_is_word_of_alphabet(): """Test the is_word_of_alphabet function.""" all_good = True alphabet = anagrams_mod.get_alphabet('test sentence') if anagrams_mod.is_word_of_alphabet(alphabet, 'tent'): # good, the word is over our alphabet pass else: sys.stderr.write('{}: word tent is not over alphabet\n'.format(sys.argv[0])) all_good = False alphabet = anagrams_mod.get_alphabet('test sentence') if anagrams_mod.is_word_of_alphabet(alphabet, 'trial'): sys.stderr.write('{}: word trial is over alphabet\n'.format(sys.argv[0])) all_good = False else: # good, the word is not over our alphabet pass return all_good def test_del_blanks(): """Test the del_blanks function.""" all_good = True result = anagrams_mod.del_blanks('abc def ghi') if result == 'abcdefghi': pass else: sys.stderr.write('{}: failed to delete blanks from abc def ghi; instead got {}\n'.format(sys.argv[0], result)) all_good = False if anagrams_mod.del_blanks('abcdefghi') == 'abcdefghi': pass else: sys.stderr.write('{}: failed to delete blanks from abcdefghi\n'.format(sys.argv[0])) all_good = False return all_good def main(): """Get the ball rolling.""" all_good = True with open('test-dictionary.txt', 'r') as file_: dictionary = anagrams_mod.get_words_dict(file_) # unit tests all_good &= test_get_words_dict(dictionary) all_good &= test_get_alphabet() all_good &= test_prune_words_dict_for_alphabet(dictionary) all_good &= test_lf_is_equal() all_good &= test_lf_is_subset() all_good &= test_lf_str() all_good &= test_lf_in() all_good &= test_lf_getitem() all_good &= test_lf_len() all_good &= test_lf_delitem() all_good &= test_is_anagram() all_good &= test_prune_words_dict_for_alphabet(dictionary) all_good &= test_del_blanks() all_good &= test_is_word_of_alphabet() # this one is a system test; it tests how the pieces are fitting together all_good &= test_pairs() if all_good: sys.exit(0) else: sys.stderr.write('{}: One or more tests failed\n'.format(sys.argv[0])) sys.exit(1) main()