2022-07-28 01:06:46 -07:00
|
|
|
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
|
|
|
# SPDX-License-Identifier: MPL-2.0
|
2020-01-17 03:10:27 -08:00
|
|
|
|
2022-07-23 00:39:24 -07:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2020-08-05 00:44:42 -07:00
|
|
|
from ocrmypdf import quality as qual
|
2020-01-17 03:10:27 -08:00
|
|
|
|
|
|
|
|
|
|
|
def test_quality_measurement():
|
|
|
|
oqd = qual.OcrQualityDictionary(
|
|
|
|
wordlist=["words", "words", "quick", "brown", "fox", "dog", "lazy"]
|
|
|
|
)
|
|
|
|
assert len(oqd.dictionary) == 6 # 6 unique
|
|
|
|
|
|
|
|
assert (
|
|
|
|
oqd.measure_words_matched("The quick brown fox jumps quickly over the lazy dog")
|
|
|
|
== 0.5
|
|
|
|
)
|
|
|
|
assert oqd.measure_words_matched("12345 10% _f 7fox -brown | words") == 1.0
|
|
|
|
|
|
|
|
assert oqd.measure_words_matched("quick quick quick") == 1.0
|