mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-25 14:14:30 +00:00
feat: configure googlevisionapi (#3126)
### Summary Includes changes from #3117. Merged into a feature branch to run the full test suite. Original PR description: The Google Vision API allows for [configuration of the API endpoint](https://cloud.google.com/vision/docs/ocr#regionalization), to select if the data should be sent to the US or the EU. This PR adds an environment variable (`GOOGLEVISION_API_ENDPOINT`) to configure it. --------- Co-authored-by: JIAQIA <jqq1716@gmail.com> Co-authored-by: Dimitri Lozeve <dimitri@lozeve.com>
This commit is contained in:
parent
4a96d54906
commit
6005abce79
@ -8,6 +8,8 @@
|
||||
|
||||
### Features
|
||||
|
||||
- **Allow configuration of the Google Vision API endpoint** Add an environment variable to select the Google Vision API in the US or the EU.
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.
|
||||
|
||||
@ -95,11 +95,14 @@ class ENVConfig:
|
||||
"""optimum text height for tesseract OCR"""
|
||||
return self._get_int("TESSERACT_OPTIMUM_TEXT_HEIGHT", 20)
|
||||
|
||||
@property
|
||||
def GOOGLEVISION_API_ENDPOINT(self) -> str:
|
||||
"""API endpoint to use for Google Vision"""
|
||||
return self._get_string("GOOGLEVISION_API_ENDPOINT", "")
|
||||
|
||||
@property
|
||||
def OCR_AGENT(self) -> str:
|
||||
"""error margin when comparing if a ocr region is within the table element when preparing
|
||||
table tokens
|
||||
"""
|
||||
"""OCR Agent to use"""
|
||||
return self._get_string("OCR_AGENT", OCR_AGENT_TESSERACT)
|
||||
|
||||
@property
|
||||
|
||||
@ -5,6 +5,8 @@ from typing import TYPE_CHECKING
|
||||
|
||||
from google.cloud.vision import Image, ImageAnnotatorClient, Paragraph, TextAnnotation
|
||||
|
||||
from unstructured.logger import logger, trace_logger
|
||||
from unstructured.partition.utils.config import env_config
|
||||
from unstructured.partition.utils.constants import Source
|
||||
from unstructured.partition.utils.ocr_models.ocr_interface import OCRAgent
|
||||
|
||||
@ -18,7 +20,14 @@ class OCRAgentGoogleVision(OCRAgent):
|
||||
"""OCR service implementation for Google Vision API."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.client = ImageAnnotatorClient()
|
||||
client_options = {}
|
||||
api_endpoint = env_config.GOOGLEVISION_API_ENDPOINT
|
||||
if api_endpoint:
|
||||
logger.info(f"Using Google Vision OCR with endpoint {api_endpoint}")
|
||||
client_options["api_endpoint"] = api_endpoint
|
||||
else:
|
||||
logger.info("Using Google Vision OCR with default endpoint")
|
||||
self.client = ImageAnnotatorClient(client_options=client_options)
|
||||
|
||||
def is_text_sorted(self) -> bool:
|
||||
return True
|
||||
@ -34,6 +43,7 @@ class OCRAgentGoogleVision(OCRAgent):
|
||||
def get_layout_from_image(
|
||||
self, image: PILImage.Image, ocr_languages: str = "eng"
|
||||
) -> list[TextRegion]:
|
||||
trace_logger.detail("Processing entire page OCR with Google Vision API...")
|
||||
with BytesIO() as buffer:
|
||||
image.save(buffer, format="PNG")
|
||||
response = self.client.document_text_detection(image=Image(content=buffer.getvalue()))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user