Model: Add proper jobs cleanup and fix var calls

Jobs should be started and immediately cleaned up when calling the generation stream. Expose a stream_generate function and append this to the base class since it's more idiomatic than generate_gen. The exl2 container's generate_gen function is now internal. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2025-04-24 21:30:55 -04:00 · 2025-04-24 21:30:55 -04:00 · f070587e9f
commit f070587e9f
parent 7e007f0761
6 changed files with 45 additions and 26 deletions
--- a/common/model.py
+++ b/common/model.py
@ -92,7 +92,7 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
    if container and container.model:
        loaded_model_name = container.model_dir.name

-        if loaded_model_name == model_path.name and container.model_loaded:
+        if loaded_model_name == model_path.name and container.loaded:
            raise ValueError(
                f'Model "{loaded_model_name}" is already loaded! Aborting.'
            )
@ -191,7 +191,7 @@ async def load_embedding_model(model_path: pathlib.Path, **kwargs):
    if embeddings_container and embeddings_container.engine:
        loaded_model_name = embeddings_container.model_dir.name

-        if loaded_model_name == model_path.name and embeddings_container.model_loaded:
+        if loaded_model_name == model_path.name and embeddings_container.loaded:
            raise ValueError(
                f'Embeddings model "{loaded_model_name}" is already loaded! Aborting.'
            )