Counting words - which implementation is faster?
-
collections
-
defaultdict
-
Counter
-
timeit
-
try
-
except
-
In this example we have 4 functions counting the number of appearances of words that are already in memmory in a list.
-
We use
timeit
to benchmark them. -
repeat
is the number of repetition of each string. -
different
is the number of different string.
from collections import defaultdict
from collections import Counter
import timeit
def generate_list_of_words(number, repeat):
#words = ['Wombat', 'Rhino', 'Sloth', 'Tarantula', 'Sloth', 'Rhino', 'Sloth']
words = []
for ix in range(number):
for _ in range(repeat):
words.append(str(ix))
return words
def plain_counter(words):
counter = {}
for word in words:
if word not in counter:
counter[word] = 0
counter[word] += 1
return counter
def counter_with_exceptions(words):
counter = {}
for word in words:
try:
counter[word] += 1
except KeyError:
counter[word] = 1
return counter
def counter_with_counter(words):
counter = Counter()
for word in words:
counter[word] += 1
return counter
def counter_with_default_dict(words):
counter = defaultdict(int)
for word in words:
counter[word] += 1
return counter
def main():
#words = generate_list_of_words(1000, 1)
#counter = plain_counter(words)
#counter = counter_with_counter(words)
#counter = counter_with_default_dict(words)
#counter = counter_with_exceptions(words)
#for word in sorted(counter.keys()):
# print("{}:{}".format(word, counter[word]))
for repeat in [1, 10, 20, 50]:
different = int(1000 / repeat)
print(f'repeat {repeat} different {different}')
for name in ['plain_counter', 'counter_with_counter', 'counter_with_default_dict', 'counter_with_exceptions']:
print("{:26} {}".format(name, timeit.timeit(f'{name}(words)',
number=10000,
setup=f'from __main__ import {name}, generate_list_of_words; words = generate_list_of_words({different}, {repeat})')))
print()
if __name__ == "__main__":
main()
repeat 1 different 1000
plain_counter 0.6091844770126045
counter_with_counter 1.232734862016514
counter_with_default_dict 0.7378899219911546
counter_with_exceptions 1.4480015779845417
repeat 10 different 100
plain_counter 0.4949962190585211
counter_with_counter 0.7886336819501594
counter_with_default_dict 0.4284116430208087
counter_with_exceptions 0.4748374510090798
repeat 20 different 50
plain_counter 0.4847069630632177
counter_with_counter 0.7627606929745525
counter_with_default_dict 0.4116779019823298
counter_with_exceptions 0.407719356007874
repeat 50 different 20
plain_counter 0.4709314970532432
counter_with_counter 0.7357207209570333
counter_with_default_dict 0.3903243549866602
counter_with_exceptions 0.36094399297144264