API: Update inline load

- Add a config flag
- Migrate support to /v1/completions
- Unify the load function

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-09-03 23:37:28 -04:00
parent dd30d6592a
commit 21f14d4318
3 changed files with 60 additions and 48 deletions

View file

@ -1,4 +1,8 @@
"""Completion utilities for OAI server."""
"""
Completion utilities for OAI server.
Also serves as a common module for completions and chat completions.
"""
import asyncio
import pathlib
@ -9,7 +13,8 @@ from typing import List, Union
from loguru import logger
from common import model
from common import config, model
from common.auth import get_key_permission
from common.networking import (
get_generator_error,
handle_request_disconnect,
@ -173,6 +178,47 @@ async def stream_generate_completion(
)
async def load_inline_model(model_name: str, request: Request):
"""Load a model from the data.model parameter"""
# Return if the model container already exists
if model.container and model.container.model_dir.name == model_name:
return
model_config = config.model_config()
# Inline model loading isn't enabled or the user isn't an admin
if not get_key_permission(request) == "admin":
logger.warning(
f"Unable to switch model to {model_name} "
"because an admin key isn't provided."
)
return
if not unwrap(model_config.get("inline_model_loading"), False):
logger.warning(
f"Unable to switch model to {model_name} because "
'"inline_model_load" is not True in config.yml.'
)
return
model_path = pathlib.Path(unwrap(model_config.get("model_dir"), "models"))
model_path = model_path / model_name
# Model path doesn't exist
if not model_path.exists():
logger.warning(
f"Could not find model path {str(model_path)}. Skipping inline model load."
)
return
# Load the model
await model.load_model(model_path)
async def generate_completion(
data: CompletionRequest, request: Request, model_path: pathlib.Path
):