Model: Auto detect model backend from config

* Use exllamav3 for exl3 models, exllamav2 otherwise
This commit is contained in:
DocShotgun 2025-05-06 18:51:58 -07:00
parent bc0a84241a
commit f8070e7707
6 changed files with 35 additions and 10 deletions

View file

@ -17,6 +17,7 @@ from common.logger import get_loading_progress_bar
from common.networking import handle_request_error
from common.tabby_config import config
from common.optional_dependencies import dependencies
from common.transformers_utils import HuggingFaceConfig
from common.utils import unwrap
# Global variables for model container
@ -123,8 +124,24 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
kwargs = {**config.model_defaults, **kwargs}
kwargs = await apply_inline_overrides(model_path, **kwargs)
# Read config.json and detect the quant method
hf_config_path = model_path / "config.json"
if hf_config_path.exists():
try:
hf_config = await HuggingFaceConfig.from_file(model_path)
except Exception as exc:
raise ValueError(
"Failed to read the model's config.json. "
f"Please check your model directory at {model_path}."
) from exc
quant_method = hf_config.quant_method()
if quant_method == "exl3":
backend_name = "exllamav3"
else:
backend_name = "exllamav2"
# Create a new container and check if the right dependencies are installed
backend_name = unwrap(kwargs.get("backend"), "exllamav2").lower()
backend_name = unwrap(kwargs.get("backend"), backend_name).lower()
container_class = _BACKEND_REGISTRY.get(backend_name)
if not container_class: