Model: Add option to select backend

Changing the backend switches the container that's used. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2025-04-27 22:27:26 -04:00 · 2025-04-27 22:27:26 -04:00 · 7c6a053747
commit 7c6a053747
parent 242f6b7d2a
4 changed files with 38 additions and 6 deletions
--- a/common/config_models.py
+++ b/common/config_models.py
@ -163,6 +163,13 @@ class ModelConfig(BaseConfigModel):
            "Example: ['max_seq_len', 'cache_mode']."
        ),
    )
+    backend: Optional[str] = Field(
+        "exllamav2",
+        description=(
+            "Backend to use for this model (default: exllamav2)\n"
+            "Options: exllamav2, exllamav3",
+        ),
+    )
    max_seq_len: Optional[int] = Field(
        None,
        description=(
--- a/common/model.py
+++ b/common/model.py
@ -23,10 +23,14 @@ from common.utils import unwrap
 container: Optional[BaseModelContainer] = None
 embeddings_container = None

-# FIXME: Possibly use this solely when creating the model
+
+_BACKEND_REGISTRY = {}
+
 if dependencies.exllamav2:
    from backends.exllamav2.model import ExllamaV2Container

+    _BACKEND_REGISTRY["exllamav2"] = ExllamaV2Container
+

 if dependencies.extras:
    from backends.infinity.model import InfinityContainer
@ -113,10 +117,24 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
    kwargs = {**config.model_defaults, **kwargs}
    kwargs = await apply_inline_overrides(model_path, **kwargs)

-    # Create a new container
-    new_container = await ExllamaV2Container.create(
-        model_path.resolve(), False, **kwargs
-    )
+    # Create a new container and check if the right dependencies are installed
+    backend_name = unwrap(kwargs.get("backend"), "exllamav2").lower()
+    container_class = _BACKEND_REGISTRY.get(backend_name)
+
+    if not container_class:
+        available_backends = list(_BACKEND_REGISTRY.keys())
+        if backend_name in available_backends:
+            raise ValueError(
+                f"Backend '{backend_name}' selected, but required dependencies "
+                "are not installed."
+            )
+        else:
+            raise ValueError(
+                f"Invalid backend '{backend_name}'. "
+                "Available backends: {available_backends}"
+            )
+
+    new_container = await container_class.create(model_path.resolve(), False, **kwargs)

    # Add possible types of models that can be loaded
    model_type = [ModelType.MODEL]
--- a/config_sample.yml
+++ b/config_sample.yml
@ -74,6 +74,10 @@ model:
  # Example: ['max_seq_len', 'cache_mode'].
  use_as_default: []

+  # Backend to use for the model (default: exllamav2)
+  # Options: exllamav2, exllamav3
+  backend: exllamav2
+
  # Max sequence length (default: Empty).
  # Fetched from the model's base sequence length in config.json by default.
  max_seq_len:
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@ -81,7 +81,10 @@ class ModelLoadRequest(BaseModel):
    )

    # Config arguments
-
+    backend: Optional[str] = Field(
+        description="Backend to use",
+        default="exllamav2",
+    )
    max_seq_len: Optional[int] = Field(
        description="Leave this blank to use the model's base sequence length",
        default=None,