diff --git a/pdelfin/repeatdetect.py b/pdelfin/repeatdetect.py index befb460..06e1d7d 100644 --- a/pdelfin/repeatdetect.py +++ b/pdelfin/repeatdetect.py @@ -1,7 +1,7 @@ import unittest import random import string -from collections import deque, defaultdict +import time class RepeatDetector: def __init__(self, max_ngram_size: int = 10): @@ -147,5 +147,25 @@ class RepeatDetectorTest(unittest.TestCase): self.assertEqual(d.ngram_repeats(), [1, 5, 1, 2]) +class BenchmarkRepeatDetect(unittest.TestCase): + def testLargeRandom(self): + all_data = [] + + for iter in range(1000): + all_data.append(''.join(random.choices("a", k=10000))) + + start = time.perf_counter() + + for data in all_data: + d = RepeatDetector(max_ngram_size=20) + d.add_letters(data) + print(d.ngram_repeats()) + + end = time.perf_counter() + + print(f"testLargeRandom took {end-start:0.0001f} seconds") + + if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/pdelfin/version.py b/pdelfin/version.py index 76a40ed..6c9a229 100644 --- a/pdelfin/version.py +++ b/pdelfin/version.py @@ -2,7 +2,7 @@ _MAJOR = "0" _MINOR = "1" # On main and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "36" +_PATCH = "37" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = ""