mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-27 07:03:52 +00:00
fix: update OpenAIEmbeddingEncoder to use langchain-openai instead of langchain-community (#3433)
Closes https://github.com/Unstructured-IO/unstructured/issues/3378. ### Summary This PR aims to update `OpenAIEmbeddingEncoder` to use `OpenAIEmbeddings` from `langchain-openai` package instead of the deprecated version from `langchain-community`. This resolves the deprecation warning and ensures compatibility with future versions of langchain.
This commit is contained in:
parent
3fe5c094fa
commit
798dcc096c
@ -1,4 +1,4 @@
|
||||
## 0.15.1-dev2
|
||||
## 0.15.1-dev3
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Update `OpenAIEmbeddingEncoder` to use `OpenAIEmbeddings` from `langchain-openai` package instead of the deprecated version from `langchain-community`.** This resolves the deprecation warning and ensures compatibility with future versions of langchain.
|
||||
* **Update import of Pinecone exception** Adds compatibility for pinecone-client>=5.0.0
|
||||
* **File-type detection catches non-existent file-path.** `detect_filetype()` no longer silently falls back to detecting a file-type based on the extension when no file exists at the path provided. Instead `FileNotFoundError` is raised. This provides consistent user notification of a mis-typed path rather than an unpredictable exception from a file-type specific partitioner when the file cannot be opened.
|
||||
* **EML files specified as a file-path are detected correctly.** Resolved a bug where an EML file submitted to `partition()` as a file-path was identified as TXT and partitioned using `partition_text()`. EML files specified by path are now identified and processed correctly, including processing any attachments.
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
-c ../deps/constraints.txt
|
||||
-c ../base.txt
|
||||
langchain-community
|
||||
tiktoken
|
||||
openai
|
||||
|
||||
langchain-openai
|
||||
|
||||
@ -4,12 +4,6 @@
|
||||
#
|
||||
# pip-compile ./ingest/embed-openai.in
|
||||
#
|
||||
aiohttp==3.9.5
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anyio==3.7.1
|
||||
@ -18,12 +12,6 @@ anyio==3.7.1
|
||||
# -c ./ingest/../deps/constraints.txt
|
||||
# httpx
|
||||
# openai
|
||||
async-timeout==4.0.3
|
||||
# via
|
||||
# aiohttp
|
||||
# langchain
|
||||
attrs==23.2.0
|
||||
# via aiohttp
|
||||
certifi==2024.7.4
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
@ -35,20 +23,12 @@ charset-normalizer==3.3.2
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# requests
|
||||
dataclasses-json==0.6.7
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# langchain-community
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
exceptiongroup==1.2.2
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# anyio
|
||||
frozenlist==1.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# aiosignal
|
||||
h11==0.14.0
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
@ -67,48 +47,18 @@ idna==3.7
|
||||
# anyio
|
||||
# httpx
|
||||
# requests
|
||||
# yarl
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==3.0.0
|
||||
# via jsonpatch
|
||||
langchain==0.2.11
|
||||
# via langchain-community
|
||||
langchain-community==0.2.10
|
||||
# via
|
||||
# -c ./ingest/../deps/constraints.txt
|
||||
# -r ./ingest/embed-openai.in
|
||||
langchain-core==0.2.23
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
# langchain-text-splitters
|
||||
langchain-text-splitters==0.2.2
|
||||
# via langchain
|
||||
langsmith==0.1.93
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
marshmallow==3.21.3
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# dataclasses-json
|
||||
multidict==6.0.5
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
mypy-extensions==1.0.0
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# typing-inspect
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# langchain
|
||||
# langchain-community
|
||||
openai==1.37.0
|
||||
# via langchain-openai
|
||||
langchain-openai==0.1.17
|
||||
# via -r ./ingest/embed-openai.in
|
||||
langsmith==0.1.93
|
||||
# via langchain-core
|
||||
openai==1.37.0
|
||||
# via langchain-openai
|
||||
orjson==3.10.6
|
||||
# via langsmith
|
||||
packaging==23.2
|
||||
@ -116,20 +66,15 @@ packaging==23.2
|
||||
# -c ./ingest/../base.txt
|
||||
# -c ./ingest/../deps/constraints.txt
|
||||
# langchain-core
|
||||
# marshmallow
|
||||
pydantic==2.8.2
|
||||
# via
|
||||
# langchain
|
||||
# langchain-core
|
||||
# langsmith
|
||||
# openai
|
||||
pydantic-core==2.20.1
|
||||
# via pydantic
|
||||
pyyaml==6.0.1
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
# via langchain-core
|
||||
regex==2024.5.15
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
@ -137,8 +82,6 @@ regex==2024.5.15
|
||||
requests==2.32.3
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# langchain
|
||||
# langchain-community
|
||||
# langsmith
|
||||
# tiktoken
|
||||
sniffio==1.3.1
|
||||
@ -147,17 +90,10 @@ sniffio==1.3.1
|
||||
# anyio
|
||||
# httpx
|
||||
# openai
|
||||
sqlalchemy==2.0.31
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
tenacity==8.5.0
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
# langchain-core
|
||||
# via langchain-core
|
||||
tiktoken==0.7.0
|
||||
# via -r ./ingest/embed-openai.in
|
||||
# via langchain-openai
|
||||
tqdm==4.66.4
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
@ -168,16 +104,8 @@ typing-extensions==4.12.2
|
||||
# openai
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# typing-inspect
|
||||
typing-inspect==0.9.0
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# dataclasses-json
|
||||
urllib3==1.26.19
|
||||
# via
|
||||
# -c ./ingest/../base.txt
|
||||
# -c ./ingest/../deps/constraints.txt
|
||||
# requests
|
||||
yarl==1.9.4
|
||||
# via aiohttp
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.15.1-dev2" # pragma: no cover
|
||||
__version__ = "0.15.1-dev3" # pragma: no cover
|
||||
|
||||
@ -12,7 +12,7 @@ from unstructured.ingest.error import EmbeddingEncoderConnectionError
|
||||
from unstructured.utils import requires_dependencies
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai.embeddings import OpenAIEmbeddings
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -65,13 +65,10 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
|
||||
return elements
|
||||
|
||||
@EmbeddingEncoderConnectionError.wrap
|
||||
@requires_dependencies(
|
||||
["langchain_community", "openai", "tiktoken"],
|
||||
extras="openai",
|
||||
)
|
||||
@requires_dependencies(["langchain_openai"], extras="openai")
|
||||
def create_client(self) -> "OpenAIEmbeddings":
|
||||
"""Creates a langchain OpenAI python client to embed elements."""
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
openai_client = OpenAIEmbeddings(
|
||||
openai_api_key=self.config.api_key,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user