graphrag/tests/mock_provider.py

# Copyright (c) 2025 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing mock model provider definitions."""

from collections.abc import AsyncGenerator, Generator
from typing import Any

from pydantic import BaseModel

from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.response.base import (
    BaseModelOutput,
    BaseModelResponse,
    ModelResponse,
)


class MockChatLLM:
    """A mock chat LLM provider."""

    def __init__(
        self,
        responses: list[str | BaseModel] | None = None,
        config: LanguageModelConfig | None = None,
        json: bool = False,
        **kwargs: Any,
    ):
        self.responses = config.responses if config and config.responses else responses
        self.response_index = 0
        self.config = config or LanguageModelConfig(
            type=ModelType.MockChat, model="gpt-4o", api_key="mock"
        )

    async def achat(
        self,
        prompt: str,
        history: list | None = None,
        **kwargs,
    ) -> ModelResponse:
        """Return the next response in the list."""
        return self.chat(prompt, history, **kwargs)

    async def achat_stream(
        self,
        prompt: str,
        history: list | None = None,
        **kwargs,
    ) -> AsyncGenerator[str, None]:
        """Return the next response in the list."""
        if not self.responses:
            return

        for response in self.responses:
            response = (
                response.model_dump_json()
                if isinstance(response, BaseModel)
                else response
            )

            yield response

    def chat(
        self,
        prompt: str,
        history: list | None = None,
        **kwargs,
    ) -> ModelResponse:
        """Return the next response in the list."""
        if not self.responses:
            return BaseModelResponse(output=BaseModelOutput(content=""))

        response = self.responses[self.response_index % len(self.responses)]
        self.response_index += 1

        parsed_json = response if isinstance(response, BaseModel) else None
        response = (
            response.model_dump_json() if isinstance(response, BaseModel) else response
        )

        return BaseModelResponse(
            output=BaseModelOutput(content=response),
            parsed_response=parsed_json,
        )

    def chat_stream(
        self,
        prompt: str,
        history: list | None = None,
        **kwargs,
    ) -> Generator[str, None]:
        """Return the next response in the list."""
        raise NotImplementedError


class MockEmbeddingLLM:
    """A mock embedding LLM provider."""

    def __init__(self, **kwargs: Any):
        self.config = LanguageModelConfig(
            type=ModelType.MockEmbedding, model="text-embedding-ada-002", api_key="mock"
        )

    def embed_batch(self, text_list: list[str], **kwargs: Any) -> list[list[float]]:
        """Generate an embedding for the input text."""
        if isinstance(text_list, str):
            return [[1.0, 1.0, 1.0]]
        return [[1.0, 1.0, 1.0] for _ in text_list]

    def embed(self, text: str, **kwargs: Any) -> list[float]:
        """Generate an embedding for the input text."""
        return [1.0, 1.0, 1.0]

    async def aembed(self, text: str, **kwargs: Any) -> list[float]:
        """Generate an embedding for the input text."""
        return [1.0, 1.0, 1.0]

    async def aembed_batch(
        self, text_list: list[str], **kwargs: Any
    ) -> list[list[float]]:
        """Generate an embedding for the input text."""
        if isinstance(text_list, str):
            return [[1.0, 1.0, 1.0]]
        return [[1.0, 1.0, 1.0] for _ in text_list]
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`# Copyright (c) 2025 Microsoft Corporation.`
			`# Licensed under the MIT License`

			`"""A module containing mock model provider definitions."""`

Fix/model provider key injection check (#1799) * Check available models for type validation * Semver * Fix ruff and pyright * Apply feedback 2025-03-11 17:48:30 -06:00			`from collections.abc import AsyncGenerator, Generator`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`from typing import Any`

			`from pydantic import BaseModel`

Support OpenAI reasoning models (#1841) * Update tiktoken * Add max_completion_tokens to model config * Update/remove outdated comments * Remove max_tokens from report generation * Remove max_tokens from entity summarization * Remove logit_bias from graph extraction * Remove logit_bias from claim extraction * Swap params if reasoning model * Add reasoning model support to basic search * Add reasoning model support for local and global search * Support reasoning models with dynamic community selection * Support reasoning models in DRIFT search * Remove unused num_threads entry * Semver * Update openai * Add reasoning_effort param 2025-04-22 14:15:26 -07:00			`from graphrag.config.enums import ModelType`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`from graphrag.config.models.language_model_config import LanguageModelConfig`
			`from graphrag.language_model.response.base import (`
			`BaseModelOutput,`
			`BaseModelResponse,`
			`ModelResponse,`
			`)`


			`class MockChatLLM:`
			`"""A mock chat LLM provider."""`

			`def __init__(`
			`self,`
			`responses: list[str \| BaseModel] \| None = None,`
			`config: LanguageModelConfig \| None = None,`
			`json: bool = False,`
			`**kwargs: Any,`
			`):`
			`self.responses = config.responses if config and config.responses else responses`
			`self.response_index = 0`
Support OpenAI reasoning models (#1841) * Update tiktoken * Add max_completion_tokens to model config * Update/remove outdated comments * Remove max_tokens from report generation * Remove max_tokens from entity summarization * Remove logit_bias from graph extraction * Remove logit_bias from claim extraction * Swap params if reasoning model * Add reasoning model support to basic search * Add reasoning model support for local and global search * Support reasoning models with dynamic community selection * Support reasoning models in DRIFT search * Remove unused num_threads entry * Semver * Update openai * Add reasoning_effort param 2025-04-22 14:15:26 -07:00			`self.config = config or LanguageModelConfig(`
			`type=ModelType.MockChat, model="gpt-4o", api_key="mock"`
			`)`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`async def achat(`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`self,`
			`prompt: str,`
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`history: list \| None = None,`
			`**kwargs,`
			`) -> ModelResponse:`
			`"""Return the next response in the list."""`
			`return self.chat(prompt, history, **kwargs)`

			`async def achat_stream(`
			`self,`
			`prompt: str,`
			`history: list \| None = None,`
			`**kwargs,`
			`) -> AsyncGenerator[str, None]:`
			`"""Return the next response in the list."""`
			`if not self.responses:`
			`return`

			`for response in self.responses:`
			`response = (`
			`response.model_dump_json()`
			`if isinstance(response, BaseModel)`
			`else response`
			`)`

			`yield response`

			`def chat(`
			`self,`
			`prompt: str,`
			`history: list \| None = None,`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`**kwargs,`
			`) -> ModelResponse:`
			`"""Return the next response in the list."""`
			`if not self.responses:`
			`return BaseModelResponse(output=BaseModelOutput(content=""))`

			`response = self.responses[self.response_index % len(self.responses)]`
			`self.response_index += 1`

			`parsed_json = response if isinstance(response, BaseModel) else None`
			`response = (`
			`response.model_dump_json() if isinstance(response, BaseModel) else response`
			`)`

			`return BaseModelResponse(`
			`output=BaseModelOutput(content=response),`
			`parsed_response=parsed_json,`
			`)`

Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`def chat_stream(`
			`self,`
			`prompt: str,`
			`history: list \| None = None,`
			`**kwargs,`
Fix/model provider key injection check (#1799) * Check available models for type validation * Semver * Fix ruff and pyright * Apply feedback 2025-03-11 17:48:30 -06:00			`) -> Generator[str, None]:`
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`"""Return the next response in the list."""`
			`raise NotImplementedError`

Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00
			`class MockEmbeddingLLM:`
			`"""A mock embedding LLM provider."""`

			`def __init__(self, **kwargs: Any):`
Support OpenAI reasoning models (#1841) * Update tiktoken * Add max_completion_tokens to model config * Update/remove outdated comments * Remove max_tokens from report generation * Remove max_tokens from entity summarization * Remove logit_bias from graph extraction * Remove logit_bias from claim extraction * Swap params if reasoning model * Add reasoning model support to basic search * Add reasoning model support for local and global search * Support reasoning models with dynamic community selection * Support reasoning models in DRIFT search * Remove unused num_threads entry * Semver * Update openai * Add reasoning_effort param 2025-04-22 14:15:26 -07:00			`self.config = LanguageModelConfig(`
			`type=ModelType.MockEmbedding, model="text-embedding-ada-002", api_key="mock"`
			`)`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`def embed_batch(self, text_list: list[str], **kwargs: Any) -> list[list[float]]:`
			`"""Generate an embedding for the input text."""`
			`if isinstance(text_list, str):`
			`return [[1.0, 1.0, 1.0]]`
			`return [[1.0, 1.0, 1.0] for _ in text_list]`

			`def embed(self, text: str, **kwargs: Any) -> list[float]:`
			`"""Generate an embedding for the input text."""`
			`return [1.0, 1.0, 1.0]`

			`async def aembed(self, text: str, **kwargs: Any) -> list[float]:`
			`"""Generate an embedding for the input text."""`
			`return [1.0, 1.0, 1.0]`

			`async def aembed_batch(`
			`self, text_list: list[str], **kwargs: Any`
			`) -> list[list[float]]:`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`"""Generate an embedding for the input text."""`
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`if isinstance(text_list, str):`
Create Language Model Providers and Registry methods. Remove fnllm coupling (#1724) * Base structure * Add fnllm providers and Mock LLM * Remove fnllm coupling, introduce llm providers * Ruff + Tests fix * Spellcheck * Semver * Format * Default MockChat params * Fix more tests * Fix embedding smoke test * Fix embeddings smoke test * Fix MockEmbeddingLLM * Rename LLM to model. Package organization * Fix prompt tuning * Oops * Oops II 2025-02-20 08:56:20 -06:00			`return [[1.0, 1.0, 1.0]]`
Feat/llm provider query (#1735) * Add ModelProvider to Query package. * Spellcheck + others * Semver * Fix tests * Format * Fix Pyright * Fix tests * Fix for smoke tests 2025-02-24 18:35:51 -06:00			`return [[1.0, 1.0, 1.0] for _ in text_list]`