Refactor embeddings and provide vanilla OpenAI-based embeddings (#11)

* Prepend all Langchain-based embeddings with LC

* Provide vanilla OpenAI embeddings

* Add test for AzureOpenAIEmbeddings and OpenAIEmbeddings

* Fix disallowed empty string

* Use OpenAIEmbeddings in flowsettings

---------

Co-authored-by: ian_Cin <ian@cinnamon.is>
This commit is contained in:
Duc Nguyen (john) 2024-04-09 15:07:59 +07:00 committed by GitHub
parent e75354d410
commit ed10020ea3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 7 deletions

View File

@ -59,7 +59,7 @@ class BaseOpenAIEmbeddings(BaseEmbeddings):
input_ = self.prepare_input(text)
client = self.prepare_client(async_version=False)
resp = self.openai_response(
client, input=[_.text for _ in input_], **kwargs
client, input=[_.text if _.text else " " for _ in input_], **kwargs
).dict()
output_ = sorted(resp["data"], key=lambda x: x["index"])
return [
@ -73,7 +73,7 @@ class BaseOpenAIEmbeddings(BaseEmbeddings):
input_ = self.prepare_input(text)
client = self.prepare_client(async_version=True)
resp = await self.openai_response(
client, input=[_.text for _ in input_], **kwargs
client, input=[_.text if _.text else " " for _ in input_], **kwargs
).dict()
output_ = sorted(resp["data"], key=lambda x: x["index"])
return [

View File

@ -59,12 +59,13 @@ if config("AZURE_OPENAI_API_KEY", default="") and config(
"spec": {
"__type__": "kotaemon.embeddings.LCAzureOpenAIEmbeddings",
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"api_key": config("AZURE_OPENAI_API_KEY", default=""),
"api_version": config("OPENAI_API_VERSION", default="")
or "2024-02-15-preview",
"deployment": config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""),
"request_timeout": 10,
"chunk_size": 16,
"azure_deployment": config(
"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""
),
"timeout": 10,
},
"default": False,
"accuracy": 5,
@ -96,7 +97,6 @@ if config("OPENAI_API_KEY", default=""):
)
or "text-embedding-ada-002",
"timeout": 10,
"chunk_size": 16,
},
"default": False,
}