mirror of
https://github.com/microsoft/graphrag.git
synced 2025-06-26 23:19:58 +00:00
[compatibility issue] Support open source LLM model to prompt-tune (#505)
Compatibility update: support non-open ai model to prompt-tune Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
This commit is contained in:
parent
7a9c9071c1
commit
c7da7f1afb
@ -0,0 +1,4 @@
|
||||
{
|
||||
"type": "patch",
|
||||
"description": "support non-open ai model config to prompt tune"
|
||||
}
|
@ -2,10 +2,12 @@
|
||||
# Licensed under the MIT License
|
||||
|
||||
"""Utilities for working with tokens."""
|
||||
import logging
|
||||
|
||||
import tiktoken
|
||||
|
||||
DEFAULT_ENCODING_NAME = "cl100k_base"
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def num_tokens_from_string(
|
||||
@ -13,7 +15,12 @@ def num_tokens_from_string(
|
||||
) -> int:
|
||||
"""Return the number of tokens in a text string."""
|
||||
if model is not None:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
except KeyError as e:
|
||||
log.error(f"Failed to get encoding for {model} when getting num_tokens_from_string, "
|
||||
f"fall back to default encoding {DEFAULT_ENCODING_NAME}")
|
||||
encoding = tiktoken.get_encoding(DEFAULT_ENCODING_NAME)
|
||||
else:
|
||||
encoding = tiktoken.get_encoding(encoding_name or DEFAULT_ENCODING_NAME)
|
||||
return len(encoding.encode(string))
|
||||
|
Loading…
x
Reference in New Issue
Block a user