[compatibility issue] Support open source LLM model to prompt-tune (#505)

Compatibility update: support non-open ai model to prompt-tune

Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
This commit is contained in:
Kylin 2024-07-12 02:03:30 +08:00 committed by GitHub
parent 7a9c9071c1
commit c7da7f1afb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 1 deletions

View File

@ -0,0 +1,4 @@
{
"type": "patch",
"description": "support non-open ai model config to prompt tune"
}

View File

@ -2,10 +2,12 @@
# Licensed under the MIT License
"""Utilities for working with tokens."""
import logging
import tiktoken
DEFAULT_ENCODING_NAME = "cl100k_base"
log = logging.getLogger(__name__)
def num_tokens_from_string(
@ -13,7 +15,12 @@ def num_tokens_from_string(
) -> int:
"""Return the number of tokens in a text string."""
if model is not None:
encoding = tiktoken.encoding_for_model(model)
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError as e:
log.error(f"Failed to get encoding for {model} when getting num_tokens_from_string, "
f"fall back to default encoding {DEFAULT_ENCODING_NAME}")
encoding = tiktoken.get_encoding(DEFAULT_ENCODING_NAME)
else:
encoding = tiktoken.get_encoding(encoding_name or DEFAULT_ENCODING_NAME)
return len(encoding.encode(string))