From 051be5aead9559696fbe5a9f55171c09e16820bc Mon Sep 17 00:00:00 2001
From: Jake Zerrer <159055756+jake-normal@users.noreply.github.com>
Date: Fri, 9 Aug 2024 00:28:48 -0400
Subject: [PATCH] Remove unstructured.pytesseract fork (#3454)

A second attempt at
https://github.com/Unstructured-IO/unstructured/pull/3360, this PR
removes unstructured's dependency on its own fork of `pytesseract`. (The
original reason for the fork, the addition of
`run_and_get_multiple_output`, was removed
[here](https://github.com/madmaze/pytesseract/releases/tag/v0.3.12).)

---------

Co-authored-by: Christine Straub <christinemstraub@gmail.com>
---
 CHANGELOG.md                                       |  3 ++-
 Dockerfile                                         |  2 +-
 Makefile                                           |  2 +-
 requirements/deps/constraints.txt                  |  4 ++--
 requirements/extra-pdf-image.in                    |  5 +----
 requirements/extra-pdf-image.txt                   | 12 ++++--------
 requirements/extra-pptx.txt                        |  2 +-
 requirements/ingest/embed-aws-bedrock.txt          |  2 +-
 requirements/ingest/embed-huggingface.txt          |  2 +-
 requirements/ingest/embed-openai.txt               |  2 +-
 requirements/ingest/embed-vertexai.txt             |  2 +-
 requirements/ingest/embed-voyageai.txt             |  2 +-
 requirements/ingest/singlestore.txt                |  2 +-
 requirements/test.txt                              |  4 +---
 .../partition/pdf_image/test_analysis.py           |  2 +-
 test_unstructured/partition/pdf_image/test_ocr.py  |  8 ++++----
 test_unstructured/partition/pdf_image/test_pdf.py  |  6 +++---
 unstructured/__version__.py                        |  2 +-
 unstructured/chunking/base.py                      |  4 +---
 unstructured/ingest/connector/notion/helpers.py    |  8 ++------
 unstructured/ingest/v2/pipeline/steps/chunk.py     |  4 +---
 unstructured/ingest/v2/pipeline/steps/embed.py     |  4 +---
 unstructured/ingest/v2/pipeline/steps/partition.py |  4 +---
 unstructured/partition/pdf.py                      |  2 +-
 unstructured/partition/utils/constants.py          |  2 +-
 .../partition/utils/ocr_models/tesseract_ocr.py    | 14 +++++++-------
 unstructured/utils.py                              |  6 ++----
 27 files changed, 45 insertions(+), 67 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c63314d1..e69af84dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.15.2-dev4
+## 0.15.2-dev5
 
 ### Enhancements
 
@@ -12,6 +12,7 @@
 * **Accommodate single-column CSV files.** Resolves a limitation of `partition_csv()` where delimiter detection would fail on a single-column CSV file (which naturally has no delimeters).
 * **Accommodate `image/jpg` in PPTX as alias for `image/jpeg`.** Resolves problem partitioning PPTX files having an invalid `image/jpg` (should be `image/jpeg`) MIME-type in the `[Content_Types].xml` member of the PPTX Zip archive.
 * **Fixes an issue in Object Detection metrics** The issue was in preprocessing/validating the ground truth and predicted data for object detection metrics.
+* **Removes dependency on unstructured.pytesseract** Unstructured forked pytesseract while waiting for code to be upstreamed. Now that the new version has been released, this fork can be removed.
 
 ## 0.15.1
 
diff --git a/Dockerfile b/Dockerfile
index bea1e6199..eb7fd5b29 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,7 @@ COPY test_unstructured test_unstructured
 COPY example-docs example-docs
 
 RUN chown -R notebook-user:notebook-user /app && \
-  apk add font-ubuntu && \
+  apk add font-ubuntu git && \
   fc-cache -fv && \
   ln -s /usr/bin/python3.11 /usr/bin/python3
 
diff --git a/Makefile b/Makefile
index 58583fd56..64c823318 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,7 @@ install-test:
 	python3 -m pip install -r requirements/test.txt
 	# NOTE(yao) - CI seem to always install tesseract to test so it would make sense to also require
 	# pytesseract installation into the virtual env for testing
-	python3 -m pip install unstructured.pytesseract -c requirements/deps/constraints.txt
+	python3 -m pip install pytesseract -c requirements/deps/constraints.txt
 	# python3 -m pip install argilla==1.28.0 -c requirements/deps/constraints.txt
 	# NOTE(robinson) - Installing weaviate-client separately here because the requests
 	# version conflicts with label_studio_sdk
diff --git a/requirements/deps/constraints.txt b/requirements/deps/constraints.txt
index 7da1129d1..93828753a 100644
--- a/requirements/deps/constraints.txt
+++ b/requirements/deps/constraints.txt
@@ -22,8 +22,8 @@ Office365-REST-Python-Client<2.4.3
 # unstructured-inference to be upgraded when unstructured library is upgraded
 # https://github.com/Unstructured-IO/unstructured/issues/1458
 # unstructured-inference
-# use the known compatible version of weaviate and unstructured.pytesseract
-unstructured.pytesseract>=0.3.12
+# use the known compatible version of weaviate and pytesseract
+pytesseract @ git+https://github.com/madmaze/pytesseract.git@v0.3.13
 weaviate-client>3.25.0
 # Note(yuming) - pining to avoid conflict with paddle install
 matplotlib==3.7.2
diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in
index 621def12e..8c68a0fcd 100644
--- a/requirements/extra-pdf-image.in
+++ b/requirements/extra-pdf-image.in
@@ -7,12 +7,9 @@ pdfminer.six
 pikepdf
 pillow_heif
 pypdf
-pytesseract
 google-cloud-vision
 effdet
 # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
 # when unstructured library is.
 unstructured-inference==0.7.36
-# unstructured fork of pytesseract that provides an interface to allow for multiple output formats
-# from one tesseract call
-unstructured.pytesseract>=0.3.12
+pytesseract>=0.3.12
diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt
index c57f23bd7..a11667dd7 100644
--- a/requirements/extra-pdf-image.txt
+++ b/requirements/extra-pdf-image.txt
@@ -138,7 +138,6 @@ packaging==23.2
     #   pikepdf
     #   pytesseract
     #   transformers
-    #   unstructured-pytesseract
 pandas==2.2.2
     # via layoutparser
 pdf2image==1.17.0
@@ -163,7 +162,6 @@ pillow==10.4.0
     #   pillow-heif
     #   pytesseract
     #   torchvision
-    #   unstructured-pytesseract
 pillow-heif==0.18.0
     # via -r ./extra-pdf-image.in
 portalocker==2.10.1
@@ -204,8 +202,10 @@ pypdf==4.3.1
     #   -r ./extra-pdf-image.in
 pypdfium2==4.30.0
     # via pdfplumber
-pytesseract==0.3.10
-    # via -r ./extra-pdf-image.in
+pytesseract @ git+https://github.com/madmaze/pytesseract.git@v0.3.13
+    # via
+    #   -c ././deps/constraints.txt
+    #   -r ./extra-pdf-image.in
 python-dateutil==2.9.0.post0
     # via
     #   -c ./base.txt
@@ -290,10 +290,6 @@ tzdata==2024.1
     # via pandas
 unstructured-inference==0.7.36
     # via -r ./extra-pdf-image.in
-unstructured-pytesseract==0.3.12
-    # via
-    #   -c ././deps/constraints.txt
-    #   -r ./extra-pdf-image.in
 urllib3==1.26.19
     # via
     #   -c ././deps/constraints.txt
diff --git a/requirements/extra-pptx.txt b/requirements/extra-pptx.txt
index 6b8c80a83..42a06959e 100644
--- a/requirements/extra-pptx.txt
+++ b/requirements/extra-pptx.txt
@@ -8,7 +8,7 @@ lxml==5.2.2
     # via python-pptx
 pillow==10.4.0
     # via python-pptx
-python-pptx==1.0.1
+python-pptx==1.0.2
     # via -r ./extra-pptx.in
 typing-extensions==4.12.2
     # via python-pptx
diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt
index 2c97a997d..3f3fe1bd0 100644
--- a/requirements/ingest/embed-aws-bedrock.txt
+++ b/requirements/ingest/embed-aws-bedrock.txt
@@ -63,7 +63,7 @@ langchain-community==0.2.11
     # via
     #   -c ./ingest/../deps/constraints.txt
     #   -r ./ingest/embed-aws-bedrock.in
-langchain-core==0.2.28
+langchain-core==0.2.29
     # via
     #   langchain
     #   langchain-community
diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt
index 4c447f6ef..24a97bd2e 100644
--- a/requirements/ingest/embed-huggingface.txt
+++ b/requirements/ingest/embed-huggingface.txt
@@ -45,7 +45,7 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==3.0.0
     # via jsonpatch
-langchain-core==0.2.28
+langchain-core==0.2.29
     # via langchain-huggingface
 langchain-huggingface==0.0.3
     # via -r ./ingest/embed-huggingface.in
diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt
index 91beb70f6..4a556148c 100644
--- a/requirements/ingest/embed-openai.txt
+++ b/requirements/ingest/embed-openai.txt
@@ -53,7 +53,7 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==3.0.0
     # via jsonpatch
-langchain-core==0.2.28
+langchain-core==0.2.29
     # via langchain-openai
 langchain-openai==0.1.20
     # via -r ./ingest/embed-openai.in
diff --git a/requirements/ingest/embed-vertexai.txt b/requirements/ingest/embed-vertexai.txt
index 035d0beb2..6574e8183 100644
--- a/requirements/ingest/embed-vertexai.txt
+++ b/requirements/ingest/embed-vertexai.txt
@@ -110,7 +110,7 @@ langchain-community==0.2.11
     # via
     #   -c ./ingest/../deps/constraints.txt
     #   -r ./ingest/embed-vertexai.in
-langchain-core==0.2.28
+langchain-core==0.2.29
     # via
     #   langchain
     #   langchain-community
diff --git a/requirements/ingest/embed-voyageai.txt b/requirements/ingest/embed-voyageai.txt
index 6db8f82e0..e4d8ecab9 100644
--- a/requirements/ingest/embed-voyageai.txt
+++ b/requirements/ingest/embed-voyageai.txt
@@ -46,7 +46,7 @@ jsonpointer==3.0.0
     # via jsonpatch
 langchain==0.2.12
     # via -r ./ingest/embed-voyageai.in
-langchain-core==0.2.28
+langchain-core==0.2.29
     # via
     #   langchain
     #   langchain-text-splitters
diff --git a/requirements/ingest/singlestore.txt b/requirements/ingest/singlestore.txt
index b3ea50606..2a6d094a6 100644
--- a/requirements/ingest/singlestore.txt
+++ b/requirements/ingest/singlestore.txt
@@ -40,7 +40,7 @@ requests==2.32.3
     # via
     #   -c ./ingest/../base.txt
     #   singlestoredb
-singlestoredb==1.6.1
+singlestoredb==1.6.2
     # via -r ./ingest/singlestore.in
 sqlparams==6.0.1
     # via singlestoredb
diff --git a/requirements/test.txt b/requirements/test.txt
index 56487c873..f9c45d8ab 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -7,9 +7,7 @@
 annotated-types==0.7.0
     # via pydantic
 appdirs==1.4.4
-    # via
-    #   label-studio-sdk
-    #   label-studio-tools
+    # via label-studio-tools
 attrs==24.2.0
     # via
     #   jsonschema
diff --git a/test_unstructured/partition/pdf_image/test_analysis.py b/test_unstructured/partition/pdf_image/test_analysis.py
index 237672a79..b39c0d1f5 100644
--- a/test_unstructured/partition/pdf_image/test_analysis.py
+++ b/test_unstructured/partition/pdf_image/test_analysis.py
@@ -147,7 +147,7 @@ def test_od_document_layout_dump():
     }
     od_layout_dump = ObjectDetectionLayoutDumper(od_document_layout).dump()
 
-    assert {"pages": od_layout_dump.get("pages")} == expected_dump
+    assert expected_dump == {"pages": od_layout_dump.get("pages")}
 
     # check OD model classes are attached but do not depend on a specific model instance
     assert "object_detection_classes" in od_layout_dump
diff --git a/test_unstructured/partition/pdf_image/test_ocr.py b/test_unstructured/partition/pdf_image/test_ocr.py
index e07fb23d3..c650db112 100644
--- a/test_unstructured/partition/pdf_image/test_ocr.py
+++ b/test_unstructured/partition/pdf_image/test_ocr.py
@@ -3,8 +3,8 @@ from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
+import pytesseract
 import pytest
-import unstructured_pytesseract
 from pdf2image.exceptions import PDFPageCountError
 from PIL import Image, UnidentifiedImageError
 from unstructured_inference.inference.elements import EmbeddedTextRegion, TextRegion
@@ -70,7 +70,7 @@ def test_supplement_page_layout_with_ocr_invalid_ocr(monkeypatch):
 
 def test_get_ocr_layout_from_image_tesseract(monkeypatch):
     monkeypatch.setattr(
-        unstructured_pytesseract,
+        pytesseract,
         "image_to_data",
         lambda *args, **kwargs: pd.DataFrame(
             {
@@ -156,7 +156,7 @@ def test_get_ocr_layout_from_image_paddle(monkeypatch):
 
 def test_get_ocr_text_from_image_tesseract(monkeypatch):
     monkeypatch.setattr(
-        unstructured_pytesseract,
+        pytesseract,
         "image_to_string",
         lambda *args, **kwargs: "Hello World",
     )
@@ -443,7 +443,7 @@ def test_auto_zoom_not_exceed_tesseract_limit(monkeypatch):
     monkeypatch.setenv("TESSERACT_MIN_TEXT_HEIGHT", "1000")
     monkeypatch.setenv("TESSERACT_OPTIMUM_TEXT_HEIGHT", "100000")
     monkeypatch.setattr(
-        unstructured_pytesseract,
+        pytesseract,
         "image_to_data",
         lambda *args, **kwargs: pd.DataFrame(
             {
diff --git a/test_unstructured/partition/pdf_image/test_pdf.py b/test_unstructured/partition/pdf_image/test_pdf.py
index 1dc603605..0ed804e9b 100644
--- a/test_unstructured/partition/pdf_image/test_pdf.py
+++ b/test_unstructured/partition/pdf_image/test_pdf.py
@@ -978,15 +978,15 @@ def test_partition_hi_res_model_name_default_to_None():
     [
         (
             PartitionStrategy.HI_RES,
-            "unstructured_pytesseract.image_to_data",
+            "pytesseract.image_to_data",
         ),
         (
             PartitionStrategy.OCR_ONLY,
-            "unstructured_pytesseract.image_to_data",
+            "pytesseract.image_to_data",
         ),
         (
             PartitionStrategy.OCR_ONLY,
-            "unstructured_pytesseract.image_to_string",
+            "pytesseract.image_to_string",
         ),
     ],
 )
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index fac8b4352..d2ae41a9f 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.15.2-dev4"  # pragma: no cover
+__version__ = "0.15.2-dev5"  # pragma: no cover
diff --git a/unstructured/chunking/base.py b/unstructured/chunking/base.py
index edf37b3d2..7f28dd3b3 100644
--- a/unstructured/chunking/base.py
+++ b/unstructured/chunking/base.py
@@ -881,9 +881,7 @@ class PreChunkBuilder:
         if self._text_length > self._opts.soft_max:
             return False
         # -- don't add an element if it would increase total size beyond the hard-max --
-        if self._remaining_space < len(element.text):
-            return False
-        return True
+        return not self._remaining_space < len(element.text)
 
     @property
     def _remaining_space(self) -> int:
diff --git a/unstructured/ingest/connector/notion/helpers.py b/unstructured/ingest/connector/notion/helpers.py
index 91ffe2f28..a09fa083b 100644
--- a/unstructured/ingest/connector/notion/helpers.py
+++ b/unstructured/ingest/connector/notion/helpers.py
@@ -393,9 +393,7 @@ def is_page_url(client: Client, url: str):
     if not page_uuid:
         return False
     check_resp = client.pages.retrieve_status(page_id=page_uuid)
-    if check_resp == 200:
-        return True
-    return False
+    return check_resp == 200
 
 
 def is_database_url(client: Client, url: str):
@@ -407,9 +405,7 @@ def is_database_url(client: Client, url: str):
     if not database_uuid:
         return False
     check_resp = client.databases.retrieve_status(database_id=database_uuid)
-    if check_resp == 200:
-        return True
-    return False
+    return check_resp == 200
 
 
 @dataclass
diff --git a/unstructured/ingest/v2/pipeline/steps/chunk.py b/unstructured/ingest/v2/pipeline/steps/chunk.py
index 07eb680d7..b2e5d14c2 100644
--- a/unstructured/ingest/v2/pipeline/steps/chunk.py
+++ b/unstructured/ingest/v2/pipeline/steps/chunk.py
@@ -39,9 +39,7 @@ class ChunkStep(PipelineStep):
     def should_chunk(self, filepath: Path, file_data: FileData) -> bool:
         if self.context.reprocess or file_data.reprocess:
             return True
-        if not filepath.exists():
-            return True
-        return False
+        return not filepath.exists()
 
     def get_output_filepath(self, filename: Path) -> Path:
         hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
diff --git a/unstructured/ingest/v2/pipeline/steps/embed.py b/unstructured/ingest/v2/pipeline/steps/embed.py
index d1fbe04bb..94103951c 100644
--- a/unstructured/ingest/v2/pipeline/steps/embed.py
+++ b/unstructured/ingest/v2/pipeline/steps/embed.py
@@ -39,9 +39,7 @@ class EmbedStep(PipelineStep):
     def should_embed(self, filepath: Path, file_data: FileData) -> bool:
         if self.context.reprocess or file_data.reprocess:
             return True
-        if not filepath.exists():
-            return True
-        return False
+        return not filepath.exists()
 
     def get_output_filepath(self, filename: Path) -> Path:
         hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
diff --git a/unstructured/ingest/v2/pipeline/steps/partition.py b/unstructured/ingest/v2/pipeline/steps/partition.py
index 0b3167847..541d2cae9 100644
--- a/unstructured/ingest/v2/pipeline/steps/partition.py
+++ b/unstructured/ingest/v2/pipeline/steps/partition.py
@@ -34,9 +34,7 @@ class PartitionStep(PipelineStep):
     def should_partition(self, filepath: Path, file_data: FileData) -> bool:
         if self.context.reprocess or file_data.reprocess:
             return True
-        if not filepath.exists():
-            return True
-        return False
+        return not filepath.exists()
 
     def get_output_filepath(self, filename: Path) -> Path:
         hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json"
diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
index 91b04beab..8b733c8e3 100644
--- a/unstructured/partition/pdf.py
+++ b/unstructured/partition/pdf.py
@@ -926,7 +926,7 @@ def _partition_pdf_or_image_with_ocr_from_image(
 
     ocr_agent = OCRAgent.get_agent(language=ocr_languages)
 
-    # NOTE(christine): `unstructured_pytesseract.image_to_string()` returns sorted text
+    # NOTE(christine): `pytesseract.image_to_string()` returns sorted text
     if ocr_agent.is_text_sorted():
         sort_mode = SORT_MODE_DONT
 
diff --git a/unstructured/partition/utils/constants.py b/unstructured/partition/utils/constants.py
index 9d802080c..03e26eb0c 100644
--- a/unstructured/partition/utils/constants.py
+++ b/unstructured/partition/utils/constants.py
@@ -43,7 +43,7 @@ OCR_AGENT_MODULES_WHITELIST = os.getenv(
 
 UNSTRUCTURED_INCLUDE_DEBUG_METADATA = os.getenv("UNSTRUCTURED_INCLUDE_DEBUG_METADATA", False)
 
-# this field is defined by pytesseract/unstructured.pytesseract
+# this field is defined by pytesseract
 TESSERACT_TEXT_HEIGHT = "height"
 
 TESSERACT_LANGUAGES_SPLITTER = "+"
diff --git a/unstructured/partition/utils/ocr_models/tesseract_ocr.py b/unstructured/partition/utils/ocr_models/tesseract_ocr.py
index 46eb8a0cb..bba58c02e 100644
--- a/unstructured/partition/utils/ocr_models/tesseract_ocr.py
+++ b/unstructured/partition/utils/ocr_models/tesseract_ocr.py
@@ -6,9 +6,9 @@ from typing import TYPE_CHECKING, List
 import cv2
 import numpy as np
 import pandas as pd
-import unstructured_pytesseract
+import pytesseract
 from PIL import Image as PILImage
-from unstructured_pytesseract import Output
+from pytesseract import Output
 
 from unstructured.logger import trace_logger
 from unstructured.partition.utils.config import env_config
@@ -40,14 +40,14 @@ class OCRAgentTesseract(OCRAgent):
         return True
 
     def get_text_from_image(self, image: PILImage.Image) -> str:
-        return unstructured_pytesseract.image_to_string(np.array(image), lang=self.language)
+        return pytesseract.image_to_string(np.array(image), lang=self.language)
 
     def get_layout_from_image(self, image: PILImage.Image) -> List[TextRegion]:
         """Get the OCR regions from image as a list of text regions with tesseract."""
 
         trace_logger.detail("Processing entire page OCR with tesseract...")
         zoom = 1
-        ocr_df: pd.DataFrame = unstructured_pytesseract.image_to_data(
+        ocr_df: pd.DataFrame = pytesseract.image_to_data(
             np.array(image),
             lang=self.language,
             output_type=Output.DATAFRAME,
@@ -76,7 +76,7 @@ class OCRAgentTesseract(OCRAgent):
                 np.round(env_config.TESSERACT_OPTIMUM_TEXT_HEIGHT / text_height, 1),
                 max_zoom,
             )
-            ocr_df = unstructured_pytesseract.image_to_data(
+            ocr_df = pytesseract.image_to_data(
                 np.array(zoom_image(image, zoom)),
                 lang=self.language,
                 output_type=Output.DATAFRAME,
@@ -96,9 +96,9 @@ class OCRAgentTesseract(OCRAgent):
         ocr_regions = self.get_layout_from_image(image)
 
         # NOTE(christine): For tesseract, the ocr_text returned by
-        # `unstructured_pytesseract.image_to_string()` doesn't contain bounding box data but is
+        # `pytesseract.image_to_string()` doesn't contain bounding box data but is
         # well grouped. Conversely, the ocr_layout returned by parsing
-        # `unstructured_pytesseract.image_to_data()` contains bounding box data but is not well
+        # `pytesseract.image_to_data()` contains bounding box data but is not well
         # grouped. Therefore, we need to first group the `ocr_layout` by `ocr_text` and then merge
         # the text regions in each group to create a list of layout elements.
 
diff --git a/unstructured/utils.py b/unstructured/utils.py
index 55fecc319..3152d02e7 100644
--- a/unstructured/utils.py
+++ b/unstructured/utils.py
@@ -458,12 +458,10 @@ def is_parent_box(parent_target: Box, child_target: Box, add: float = 0.0) -> bo
         and (child_target[2] <= parent_targets[2] and child_target[3] <= parent_targets[3])
     ):
         return True
-    if len(child_target) == 2 and (
+    return len(child_target) == 2 and (
         parent_targets[0] <= child_target[0] <= parent_targets[2]
         and parent_targets[1] <= child_target[1] <= parent_targets[3]
-    ):
-        return True
-    return False
+    )
 
 
 def calculate_overlap_percentage(