Model: Add option to select backend
Changing the backend switches the container that's used. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
parent
242f6b7d2a
commit
7c6a053747
4 changed files with 38 additions and 6 deletions
|
|
@ -163,6 +163,13 @@ class ModelConfig(BaseConfigModel):
|
||||||
"Example: ['max_seq_len', 'cache_mode']."
|
"Example: ['max_seq_len', 'cache_mode']."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
backend: Optional[str] = Field(
|
||||||
|
"exllamav2",
|
||||||
|
description=(
|
||||||
|
"Backend to use for this model (default: exllamav2)\n"
|
||||||
|
"Options: exllamav2, exllamav3",
|
||||||
|
),
|
||||||
|
)
|
||||||
max_seq_len: Optional[int] = Field(
|
max_seq_len: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
|
|
|
||||||
|
|
@ -23,10 +23,14 @@ from common.utils import unwrap
|
||||||
container: Optional[BaseModelContainer] = None
|
container: Optional[BaseModelContainer] = None
|
||||||
embeddings_container = None
|
embeddings_container = None
|
||||||
|
|
||||||
# FIXME: Possibly use this solely when creating the model
|
|
||||||
|
_BACKEND_REGISTRY = {}
|
||||||
|
|
||||||
if dependencies.exllamav2:
|
if dependencies.exllamav2:
|
||||||
from backends.exllamav2.model import ExllamaV2Container
|
from backends.exllamav2.model import ExllamaV2Container
|
||||||
|
|
||||||
|
_BACKEND_REGISTRY["exllamav2"] = ExllamaV2Container
|
||||||
|
|
||||||
|
|
||||||
if dependencies.extras:
|
if dependencies.extras:
|
||||||
from backends.infinity.model import InfinityContainer
|
from backends.infinity.model import InfinityContainer
|
||||||
|
|
@ -113,10 +117,24 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
|
||||||
kwargs = {**config.model_defaults, **kwargs}
|
kwargs = {**config.model_defaults, **kwargs}
|
||||||
kwargs = await apply_inline_overrides(model_path, **kwargs)
|
kwargs = await apply_inline_overrides(model_path, **kwargs)
|
||||||
|
|
||||||
# Create a new container
|
# Create a new container and check if the right dependencies are installed
|
||||||
new_container = await ExllamaV2Container.create(
|
backend_name = unwrap(kwargs.get("backend"), "exllamav2").lower()
|
||||||
model_path.resolve(), False, **kwargs
|
container_class = _BACKEND_REGISTRY.get(backend_name)
|
||||||
)
|
|
||||||
|
if not container_class:
|
||||||
|
available_backends = list(_BACKEND_REGISTRY.keys())
|
||||||
|
if backend_name in available_backends:
|
||||||
|
raise ValueError(
|
||||||
|
f"Backend '{backend_name}' selected, but required dependencies "
|
||||||
|
"are not installed."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid backend '{backend_name}'. "
|
||||||
|
"Available backends: {available_backends}"
|
||||||
|
)
|
||||||
|
|
||||||
|
new_container = await container_class.create(model_path.resolve(), False, **kwargs)
|
||||||
|
|
||||||
# Add possible types of models that can be loaded
|
# Add possible types of models that can be loaded
|
||||||
model_type = [ModelType.MODEL]
|
model_type = [ModelType.MODEL]
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,10 @@ model:
|
||||||
# Example: ['max_seq_len', 'cache_mode'].
|
# Example: ['max_seq_len', 'cache_mode'].
|
||||||
use_as_default: []
|
use_as_default: []
|
||||||
|
|
||||||
|
# Backend to use for the model (default: exllamav2)
|
||||||
|
# Options: exllamav2, exllamav3
|
||||||
|
backend: exllamav2
|
||||||
|
|
||||||
# Max sequence length (default: Empty).
|
# Max sequence length (default: Empty).
|
||||||
# Fetched from the model's base sequence length in config.json by default.
|
# Fetched from the model's base sequence length in config.json by default.
|
||||||
max_seq_len:
|
max_seq_len:
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,10 @@ class ModelLoadRequest(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Config arguments
|
# Config arguments
|
||||||
|
backend: Optional[str] = Field(
|
||||||
|
description="Backend to use",
|
||||||
|
default="exllamav2",
|
||||||
|
)
|
||||||
max_seq_len: Optional[int] = Field(
|
max_seq_len: Optional[int] = Field(
|
||||||
description="Leave this blank to use the model's base sequence length",
|
description="Leave this blank to use the model's base sequence length",
|
||||||
default=None,
|
default=None,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue