Model: Add option to select backend

Changing the backend switches the container that's used.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri 2025-04-27 22:27:26 -04:00
parent 242f6b7d2a
commit 7c6a053747
4 changed files with 38 additions and 6 deletions

View file

@ -163,6 +163,13 @@ class ModelConfig(BaseConfigModel):
"Example: ['max_seq_len', 'cache_mode']."
),
)
backend: Optional[str] = Field(
"exllamav2",
description=(
"Backend to use for this model (default: exllamav2)\n"
"Options: exllamav2, exllamav3",
),
)
max_seq_len: Optional[int] = Field(
None,
description=(

View file

@ -23,10 +23,14 @@ from common.utils import unwrap
container: Optional[BaseModelContainer] = None
embeddings_container = None
# FIXME: Possibly use this solely when creating the model
_BACKEND_REGISTRY = {}
if dependencies.exllamav2:
from backends.exllamav2.model import ExllamaV2Container
_BACKEND_REGISTRY["exllamav2"] = ExllamaV2Container
if dependencies.extras:
from backends.infinity.model import InfinityContainer
@ -113,10 +117,24 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
kwargs = {**config.model_defaults, **kwargs}
kwargs = await apply_inline_overrides(model_path, **kwargs)
# Create a new container
new_container = await ExllamaV2Container.create(
model_path.resolve(), False, **kwargs
)
# Create a new container and check if the right dependencies are installed
backend_name = unwrap(kwargs.get("backend"), "exllamav2").lower()
container_class = _BACKEND_REGISTRY.get(backend_name)
if not container_class:
available_backends = list(_BACKEND_REGISTRY.keys())
if backend_name in available_backends:
raise ValueError(
f"Backend '{backend_name}' selected, but required dependencies "
"are not installed."
)
else:
raise ValueError(
f"Invalid backend '{backend_name}'. "
"Available backends: {available_backends}"
)
new_container = await container_class.create(model_path.resolve(), False, **kwargs)
# Add possible types of models that can be loaded
model_type = [ModelType.MODEL]

View file

@ -74,6 +74,10 @@ model:
# Example: ['max_seq_len', 'cache_mode'].
use_as_default: []
# Backend to use for the model (default: exllamav2)
# Options: exllamav2, exllamav3
backend: exllamav2
# Max sequence length (default: Empty).
# Fetched from the model's base sequence length in config.json by default.
max_seq_len:

View file

@ -81,7 +81,10 @@ class ModelLoadRequest(BaseModel):
)
# Config arguments
backend: Optional[str] = Field(
description="Backend to use",
default="exllamav2",
)
max_seq_len: Optional[int] = Field(
description="Leave this blank to use the model's base sequence length",
default=None,