Embeddings: Add model management
Embedding models are managed on a separate backend, but are run in parallel with the model itself. Therefore, manage this in a separate container with separate routes. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
f13d0fb8b3
commit
bfa011e0ce
6 changed files with 135 additions and 19 deletions
|
|
@ -57,8 +57,6 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
|
|||
f'Model "{loaded_model_name}" is already loaded! Aborting.'
|
||||
)
|
||||
|
||||
# Unload the existing model
|
||||
if container and container.model:
|
||||
logger.info("Unloading existing model.")
|
||||
await unload_model()
|
||||
|
||||
|
|
@ -109,24 +107,35 @@ async def unload_loras():
|
|||
await container.unload(loras_only=True)
|
||||
|
||||
|
||||
async def load_embeddings_model(model_path: pathlib.Path, **kwargs):
|
||||
async def load_embedding_model(model_path: pathlib.Path, **kwargs):
|
||||
global embeddings_container
|
||||
|
||||
# Break out if infinity isn't installed
|
||||
if not has_infinity_emb:
|
||||
logger.warning(
|
||||
raise ImportError(
|
||||
"Skipping embeddings because infinity-emb is not installed.\n"
|
||||
"Please run the following command in your environment "
|
||||
"to install extra packages:\n"
|
||||
"pip install -U .[extras]"
|
||||
)
|
||||
return
|
||||
|
||||
# Check if the model is already loaded
|
||||
if embeddings_container and embeddings_container.engine:
|
||||
loaded_model_name = embeddings_container.model_dir.name
|
||||
|
||||
if loaded_model_name == model_path.name and embeddings_container.model_loaded:
|
||||
raise ValueError(
|
||||
f'Embeddings model "{loaded_model_name}" is already loaded! Aborting.'
|
||||
)
|
||||
|
||||
logger.info("Unloading existing embeddings model.")
|
||||
await unload_embedding_model()
|
||||
|
||||
embeddings_container = InfinityContainer(model_path)
|
||||
await embeddings_container.load(**kwargs)
|
||||
|
||||
|
||||
async def unload_embeddings_model():
|
||||
async def unload_embedding_model():
|
||||
global embeddings_container
|
||||
|
||||
await embeddings_container.unload()
|
||||
|
|
@ -172,7 +181,7 @@ async def check_embeddings_container():
|
|||
embeddings_container.model_is_loading or embeddings_container.model_loaded
|
||||
):
|
||||
error_message = handle_request_error(
|
||||
"No embeddings models are currently loaded.",
|
||||
"No embedding models are currently loaded.",
|
||||
exc_info=False,
|
||||
).error.message
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue