From 2e00d8d3d4a2441fa71bb2b54bc6581accf2d6fe Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Tue, 2 Sep 2025 10:31:51 +0800 Subject: [PATCH] Use 'float' explicitly for OpenAI's embedding "encoding_format" (#9838) ### What problem does this PR solve? The default value for OpenAI '/v1/embeddings' parameter 'encoding_format' is 'base64'. Use 'float' explicitly to avoid base64 encoding & decoding, larger data size. https://github.com/openai/openai-python/blob/main/src/openai/resources/embeddings.py if not is_given(encoding_format): params["encoding_format"] = "base64" ### Type of change - [x] Performance Improvement --- rag/llm/embedding_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 272e0ff0b..4a9f375a6 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -145,7 +145,7 @@ class OpenAIEmbed(Base): ress = [] total_tokens = 0 for i in range(0, len(texts), batch_size): - res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name) + res = self.client.embeddings.create(input=texts[i : i + batch_size], model=self.model_name, encoding_format="float") try: ress.extend([d.embedding for d in res.data]) total_tokens += self.total_token_count(res) @@ -154,7 +154,7 @@ class OpenAIEmbed(Base): return np.array(ress), total_tokens def encode_queries(self, text): - res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name) + res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float") return np.array(res.data[0].embedding), self.total_token_count(res)