OAI: Add cancellation with inline load

When the request is cancelled, cancel the load task. In addition,
when checking if a model container exists, also check if the model
is fully loaded.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-09-11 00:08:55 -04:00
parent b9e5693c1b
commit e00eb09ef3
2 changed files with 14 additions and 3 deletions

View file

@ -55,7 +55,14 @@ async def completion_request(
"""
if data.model:
await load_inline_model(data.model, request)
inline_load_task = asyncio.create_task(load_inline_model(data.model, request))
await run_with_request_disconnect(
request,
inline_load_task,
disconnect_message=f"Model switch for generation {request.state.id} "
+ "cancelled by user.",
)
else:
await check_model_container()

View file

@ -112,8 +112,12 @@ async def _stream_collector(
async def load_inline_model(model_name: str, request: Request):
"""Load a model from the data.model parameter"""
# Return if the model container already exists
if model.container and model.container.model_dir.name == model_name:
# Return if the model container already exists and the model is fully loaded
if (
model.container
and model.container.model_dir.name == model_name
and model.container.model_loaded
):
return
# Inline model loading isn't enabled or the user isn't an admin