mirror of
https://github.com/allenai/olmocr.git
synced 2025-08-22 15:52:43 +00:00
new version
This commit is contained in:
parent
53a510479b
commit
09319a64ea
@ -1,7 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
from collections import deque, defaultdict
|
import time
|
||||||
|
|
||||||
class RepeatDetector:
|
class RepeatDetector:
|
||||||
def __init__(self, max_ngram_size: int = 10):
|
def __init__(self, max_ngram_size: int = 10):
|
||||||
@ -147,5 +147,25 @@ class RepeatDetectorTest(unittest.TestCase):
|
|||||||
self.assertEqual(d.ngram_repeats(), [1, 5, 1, 2])
|
self.assertEqual(d.ngram_repeats(), [1, 5, 1, 2])
|
||||||
|
|
||||||
|
|
||||||
|
class BenchmarkRepeatDetect(unittest.TestCase):
|
||||||
|
def testLargeRandom(self):
|
||||||
|
all_data = []
|
||||||
|
|
||||||
|
for iter in range(1000):
|
||||||
|
all_data.append(''.join(random.choices("a", k=10000)))
|
||||||
|
|
||||||
|
start = time.perf_counter()
|
||||||
|
|
||||||
|
for data in all_data:
|
||||||
|
d = RepeatDetector(max_ngram_size=20)
|
||||||
|
d.add_letters(data)
|
||||||
|
print(d.ngram_repeats())
|
||||||
|
|
||||||
|
end = time.perf_counter()
|
||||||
|
|
||||||
|
print(f"testLargeRandom took {end-start:0.0001f} seconds")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
unittest.main()
|
unittest.main()
|
@ -2,7 +2,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "1"
|
_MINOR = "1"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "36"
|
_PATCH = "37"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user