Model: Add proper jobs cleanup and fix var calls

Jobs should be started and immediately cleaned up when calling the
generation stream. Expose a stream_generate function and append
this to the base class since it's more idiomatic than generate_gen.

The exl2 container's generate_gen function is now internal.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri 2025-04-24 21:30:55 -04:00
parent 7e007f0761
commit f070587e9f
6 changed files with 45 additions and 26 deletions

View file

@ -92,7 +92,7 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
if container and container.model:
loaded_model_name = container.model_dir.name
if loaded_model_name == model_path.name and container.model_loaded:
if loaded_model_name == model_path.name and container.loaded:
raise ValueError(
f'Model "{loaded_model_name}" is already loaded! Aborting.'
)
@ -191,7 +191,7 @@ async def load_embedding_model(model_path: pathlib.Path, **kwargs):
if embeddings_container and embeddings_container.engine:
loaded_model_name = embeddings_container.model_dir.name
if loaded_model_name == model_path.name and embeddings_container.model_loaded:
if loaded_model_name == model_path.name and embeddings_container.loaded:
raise ValueError(
f'Embeddings model "{loaded_model_name}" is already loaded! Aborting.'
)