mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
Renamed OpenAiEmbeddingConfig dataclass (#2546)
This commit is contained in:
parent
882370022e
commit
51427b3103
@ -1,4 +1,4 @@
|
||||
## 0.12.5-dev4
|
||||
## 0.12.5-dev5
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
* **Fix don't treat plain text files with double quotes as JSON ** If a file can be deserialized as JSON but it deserializes as a string, treat it as plain text even though it's valid JSON.
|
||||
* **Fix `check_connection` in opensearch, databricks, postgres, azure connectors **
|
||||
* **Fix cluster of bugs in `partition_xlsx()` that dropped content.** Algorithm for detecting "subtables" within a worksheet dropped table elements for certain patterns of populated cells such as when a trailing single-cell row appeared in a contiguous block of populated cells.
|
||||
* **Rename `OpenAiEmbeddingConfig` to `OpenAIEmbeddingConfig`.
|
||||
|
||||
## 0.12.4
|
||||
|
||||
|
@ -43,10 +43,10 @@ To obtain an api key, visit: https://platform.openai.com/account/api-keys
|
||||
import os
|
||||
|
||||
from unstructured.documents.elements import Text
|
||||
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
|
||||
# Initialize the encoder with OpenAI credentials
|
||||
embedding_encoder = OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"]))
|
||||
embedding_encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"]))
|
||||
|
||||
# Embed a list of Elements
|
||||
elements = embedding_encoder.embed_documents(
|
||||
|
@ -1,10 +1,10 @@
|
||||
import os
|
||||
|
||||
from unstructured.documents.elements import Text
|
||||
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
|
||||
embedding_encoder = OpenAIEmbeddingEncoder(
|
||||
config=OpenAiEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"])
|
||||
config=OpenAIEmbeddingConfig(api_key=os.environ["OPENAI_API_KEY"])
|
||||
)
|
||||
elements = embedding_encoder.embed_documents(
|
||||
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
|
||||
|
@ -1,5 +1,5 @@
|
||||
from unstructured.documents.elements import Text
|
||||
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
|
||||
|
||||
def test_embed_documents_does_not_break_element_to_dict(mocker):
|
||||
@ -10,7 +10,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker):
|
||||
# Mock create_client to return our mock_client
|
||||
mocker.patch.object(OpenAIEmbeddingEncoder, "create_client", return_value=mock_client)
|
||||
|
||||
encoder = OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(api_key="api_key"))
|
||||
encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key"))
|
||||
elements = encoder.embed_documents(
|
||||
elements=[Text("This is sentence 1"), Text("This is sentence 2")],
|
||||
)
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.12.5-dev4" # pragma: no cover
|
||||
__version__ = "0.12.5-dev5" # pragma: no cover
|
||||
|
@ -15,14 +15,14 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAiEmbeddingConfig(EmbeddingConfig):
|
||||
class OpenAIEmbeddingConfig(EmbeddingConfig):
|
||||
api_key: str
|
||||
model_name: str = "text-embedding-ada-002"
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
||||
config: OpenAiEmbeddingConfig
|
||||
config: OpenAIEmbeddingConfig
|
||||
_client: Optional["OpenAIEmbeddings"] = field(init=False, default=None)
|
||||
_exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
|
||||
|
||||
|
@ -196,9 +196,9 @@ class EmbeddingConfig(BaseConfig):
|
||||
kwargs["model_name"] = self.model_name
|
||||
# TODO make this more dynamic to map to encoder configs
|
||||
if self.provider == "langchain-openai":
|
||||
from unstructured.embed.openai import OpenAiEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
||||
|
||||
return OpenAIEmbeddingEncoder(config=OpenAiEmbeddingConfig(**kwargs))
|
||||
return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs))
|
||||
elif self.provider == "langchain-huggingface":
|
||||
from unstructured.embed.huggingface import (
|
||||
HuggingFaceEmbeddingConfig,
|
||||
|
Loading…
x
Reference in New Issue
Block a user