Adding check for possible sglang livelock

This commit is contained in:
Jake Poznanski 2024-11-22 21:50:45 +00:00
parent cff97990bf
commit a95487e44c
2 changed files with 6 additions and 1 deletions

View File

@ -437,6 +437,11 @@ async def sglang_server_task(args, semaphore):
logger.error("Cannot continue, sampling errors detected, model is probably corrupt")
sys.exit(1)
# TODO, need to trace down this issue in sglang itself, but it will otherwise cause the server to lock up
if "IndexError: list index out of range" in line:
logger.error("IndexError in model, restarting server")
proc.terminate()
if not server_printed_ready_message and "The server is fired up and ready to roll!" in line:
server_printed_ready_message = True
last_semaphore_release = time.time()

View File

@ -2,7 +2,7 @@ _MAJOR = "0"
_MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "43"
_PATCH = "44"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""