Renamed OpenAiEmbeddingConfig dataclass (#2546)

This commit is contained in:
Ronny H 2024-02-14 09:24:52 -08:00 committed by GitHub
parent 882370022e
commit 51427b3103
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 13 additions and 12 deletions

View File

@ -1,4 +1,4 @@
## 0.12.5-dev4
## 0.12.5-dev5
### Enhancements
@ -12,6 +12,7 @@
* **Fix don't treat plain text files with double quotes as JSON ** If a file can be deserialized as JSON but it deserializes as a string, treat it as plain text even though it's valid JSON.
* **Fix `check_connection` in opensearch, databricks, postgres, azure connectors **
* **Fix cluster of bugs in `partition_xlsx()` that dropped content.** Algorithm for detecting "subtables" within a worksheet dropped table elements for certain patterns of populated cells such as when a trailing single-cell row appeared in a contiguous block of populated cells.
* **Rename `OpenAiEmbeddingConfig` to `OpenAIEmbeddingConfig`.
## 0.12.4

View File

@ -43,10 +43,10 @@ To obtain an api key, visit: https://platform.openai.com/account/api-keys
import os
from unstructured.documents.elements import Text
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
# Initialize the encoder with OpenAI credentials
embedding_encoder = OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"]))
embedding_encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"]))
# Embed a list of Elements
elements = embedding_encoder.embed_documents(

View File

@ -1,10 +1,10 @@
import os
from unstructured.documents.elements import Text
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
embedding_encoder = OpenAIEmbeddingEncoder(
config=OpenAiEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"])
config=OpenAIEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"])
)
elements = embedding_encoder.embed_documents(
elements=[Text("This is sentence 1"), Text("This is sentence 2")],

View File

@ -1,5 +1,5 @@
from unstructured.documents.elements import Text
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
def test_embed_documents_does_not_break_element_to_dict(mocker):
@ -10,7 +10,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
# Mock create_client to return our mock_client
mocker.patch.object(OpenAIEmbeddingEncoder, "create_client", return_value=mock_client)
encoder = OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(api_key="api_key"))
encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key"))
elements = encoder.embed_documents(
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
)

View File

@ -1 +1 @@
__version__ = "0.12.5-dev4" # pragma: no cover
__version__ = "0.12.5-dev5" # pragma: no cover

View File

@ -15,14 +15,14 @@ if TYPE_CHECKING:
@dataclass
class OpenAiEmbeddingConfig(EmbeddingConfig):
class OpenAIEmbeddingConfig(EmbeddingConfig):
api_key: str
model_name: str = "text-embedding-ada-002"
@dataclass
class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
config: OpenAiEmbeddingConfig
config: OpenAIEmbeddingConfig
_client: Optional["OpenAIEmbeddings"] = field(init=False, default=None)
_exemplary_embedding: Optional[List[float]] = field(init=False, default=None)

View File

@ -196,9 +196,9 @@ class EmbeddingConfig(BaseConfig):
kwargs["model_name"] = self.model_name
# TODO make this more dynamic to map to encoder configs
if self.provider == "langchain-openai":
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
return OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(**kwargs))
return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs))
elif self.provider == "langchain-huggingface":
from unstructured.embed.huggingface import (
HuggingFaceEmbeddingConfig,