autogen/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py

114 lines
3.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3 -m pytest
import os
import sys
import pytest
from autogen import config_list_from_json
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
2447 fix pgvector tests and notebook (#2455) * Re-added missing notebook * Test installing postgres * Error handle the connection. * Fixed import. * Fixed import. * Fixed creation of collection without client. * PGVector portion working. OpenAI untested. * Fixed prints. * Added output. * Fixed pre-commits. * Run pgvector notebook * Improve efficiency of get_collection * Fix delete_collection * Fixed issues with pytests and validated functions. * Validated pytests. * Fixed pre-commits * Separated extra_requires to allow more logic. Retrieve_chat base dependencies included on pgvector and qdrant. * Fixed extra newline. * Added username and password fields. * URL Encode the connection string parameters to support symbols like % * Fixed pre-commits. * Added pgvector service * pgvector doesn't have health intervals. * Switched to colon based key values. * Run on Ubuntu only. Linux is only option with container service support. * Using default credentials instead. * Fix postgres setup * Fix postgres setup * Don't skip tests on win and mac * Fix command error * Try apt install postgresql * Assert table does not exist when deleted. * Raise value error on a empty list or None value provided for IDs * pre-commit * Add install pgvector * Add install pgvector * Reorg test files, create a separate job for test pgvector * Fix format * Fix env format * Simplify job name, enable test_retrieve_config * Fix test_retrieve_config * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Fixed pre-commits. * Added return values for all functions. * Validated distance search is implemented correctly. * Validated all pytests * Removed print. * Added default clause. * Make ids optional * Fix test, make it more robust * Bump version of openai for the vector_store support * Added support for choosing the sentence transformer model. * Added error handling for model name entered. * Updated model info. * Added model_name db_config param. * pre-commit fixes and last link fix. * Use secrets password. * fix: link fixed * updated tests * Updated config_list. * pre-commit fix. * Added chat_result to all output. Unable to re-run notebooks. * Pre-commit fix detected this requirement. * Fix python 3.8 and 3.9 not supported for macos * Fix python 3.8 and 3.9 not supported for macos * Fix format * Reran notebook with MetaLlama3Instruct7BQ4_k_M * added gpt model. * Reran notebook --------- Co-authored-by: Li Jiang <bnujli@gmail.com> Co-authored-by: Hk669 <hrushi669@gmail.com>
2024-04-28 06:43:02 -07:00
sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
from conftest import skip_openai # noqa: E402
2447 fix pgvector tests and notebook (#2455) * Re-added missing notebook * Test installing postgres * Error handle the connection. * Fixed import. * Fixed import. * Fixed creation of collection without client. * PGVector portion working. OpenAI untested. * Fixed prints. * Added output. * Fixed pre-commits. * Run pgvector notebook * Improve efficiency of get_collection * Fix delete_collection * Fixed issues with pytests and validated functions. * Validated pytests. * Fixed pre-commits * Separated extra_requires to allow more logic. Retrieve_chat base dependencies included on pgvector and qdrant. * Fixed extra newline. * Added username and password fields. * URL Encode the connection string parameters to support symbols like % * Fixed pre-commits. * Added pgvector service * pgvector doesn't have health intervals. * Switched to colon based key values. * Run on Ubuntu only. Linux is only option with container service support. * Using default credentials instead. * Fix postgres setup * Fix postgres setup * Don't skip tests on win and mac * Fix command error * Try apt install postgresql * Assert table does not exist when deleted. * Raise value error on a empty list or None value provided for IDs * pre-commit * Add install pgvector * Add install pgvector * Reorg test files, create a separate job for test pgvector * Fix format * Fix env format * Simplify job name, enable test_retrieve_config * Fix test_retrieve_config * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Fixed pre-commits. * Added return values for all functions. * Validated distance search is implemented correctly. * Validated all pytests * Removed print. * Added default clause. * Make ids optional * Fix test, make it more robust * Bump version of openai for the vector_store support * Added support for choosing the sentence transformer model. * Added error handling for model name entered. * Updated model info. * Added model_name db_config param. * pre-commit fixes and last link fix. * Use secrets password. * fix: link fixed * updated tests * Updated config_list. * pre-commit fix. * Added chat_result to all output. Unable to re-run notebooks. * Pre-commit fix detected this requirement. * Fix python 3.8 and 3.9 not supported for macos * Fix python 3.8 and 3.9 not supported for macos * Fix format * Reran notebook with MetaLlama3Instruct7BQ4_k_M * added gpt model. * Reran notebook --------- Co-authored-by: Li Jiang <bnujli@gmail.com> Co-authored-by: Hk669 <hrushi669@gmail.com>
2024-04-28 06:43:02 -07:00
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
try:
import fastembed
from qdrant_client import QdrantClient
from autogen.agentchat.contrib.qdrant_retrieve_user_proxy_agent import (
QdrantRetrieveUserProxyAgent,
create_qdrant_from_dir,
query_qdrant,
)
QDRANT_INSTALLED = True
except ImportError:
QDRANT_INSTALLED = False
try:
import openai
except ImportError:
skip = True
else:
skip = False or skip_openai
2447 fix pgvector tests and notebook (#2455) * Re-added missing notebook * Test installing postgres * Error handle the connection. * Fixed import. * Fixed import. * Fixed creation of collection without client. * PGVector portion working. OpenAI untested. * Fixed prints. * Added output. * Fixed pre-commits. * Run pgvector notebook * Improve efficiency of get_collection * Fix delete_collection * Fixed issues with pytests and validated functions. * Validated pytests. * Fixed pre-commits * Separated extra_requires to allow more logic. Retrieve_chat base dependencies included on pgvector and qdrant. * Fixed extra newline. * Added username and password fields. * URL Encode the connection string parameters to support symbols like % * Fixed pre-commits. * Added pgvector service * pgvector doesn't have health intervals. * Switched to colon based key values. * Run on Ubuntu only. Linux is only option with container service support. * Using default credentials instead. * Fix postgres setup * Fix postgres setup * Don't skip tests on win and mac * Fix command error * Try apt install postgresql * Assert table does not exist when deleted. * Raise value error on a empty list or None value provided for IDs * pre-commit * Add install pgvector * Add install pgvector * Reorg test files, create a separate job for test pgvector * Fix format * Fix env format * Simplify job name, enable test_retrieve_config * Fix test_retrieve_config * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Corrected behavior for get_docs_by_ids with no ids returning all docs. * Fixed pre-commits. * Added return values for all functions. * Validated distance search is implemented correctly. * Validated all pytests * Removed print. * Added default clause. * Make ids optional * Fix test, make it more robust * Bump version of openai for the vector_store support * Added support for choosing the sentence transformer model. * Added error handling for model name entered. * Updated model info. * Added model_name db_config param. * pre-commit fixes and last link fix. * Use secrets password. * fix: link fixed * updated tests * Updated config_list. * pre-commit fix. * Added chat_result to all output. Unable to re-run notebooks. * Pre-commit fix detected this requirement. * Fix python 3.8 and 3.9 not supported for macos * Fix python 3.8 and 3.9 not supported for macos * Fix format * Reran notebook with MetaLlama3Instruct7BQ4_k_M * added gpt model. * Reran notebook --------- Co-authored-by: Li Jiang <bnujli@gmail.com> Co-authored-by: Hk669 <hrushi669@gmail.com>
2024-04-28 06:43:02 -07:00
test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_retrievechat():
conversations = {}
# ChatCompletion.start_logging(conversations) # deprecated in v0.2
config_list = config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)
assistant = RetrieveAssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
},
)
client = QdrantClient(":memory:")
ragproxyagent = QdrantRetrieveUserProxyAgent(
name="ragproxyagent",
human_input_mode="NEVER",
max_consecutive_auto_reply=2,
retrieve_config={
"client": client,
"docs_path": "./website/docs",
"chunk_token_size": 2000,
},
)
assistant.reset()
code_problem = "How can I use FLAML to perform a classification task, set use_spark=True, train 30 seconds and force cancel jobs if time limit is reached."
ragproxyagent.initiate_chat(assistant, message=ragproxyagent.message_generator, problem=code_problem, silent=True)
print(conversations)
@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
def test_qdrant_filter():
client = QdrantClient(":memory:")
create_qdrant_from_dir(dir_path="./website/docs", client=client, collection_name="autogen-docs")
results = query_qdrant(
query_texts=["How can I use AutoGen UserProxyAgent and AssistantAgent to do code generation?"],
n_results=4,
client=client,
collection_name="autogen-docs",
# Return only documents with "AutoGen" in the string
search_string="AutoGen",
)
assert len(results["ids"][0]) == 4
@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
def test_qdrant_search():
client = QdrantClient(":memory:")
create_qdrant_from_dir(test_dir, client=client)
assert client.get_collection("all-my-documents")
# Perform a semantic search without any filter
results = query_qdrant(["autogen"], client=client)
assert isinstance(results, dict) and any("autogen" in res[0].lower() for res in results.get("documents", []))
if __name__ == "__main__":
test_retrievechat()
test_qdrant_filter()
test_qdrant_search()