From 58e34ba4c5ca38cfba4bbced01e85c69c5c7c07f Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Thu, 1 May 2025 23:05:41 -0700 Subject: [PATCH] Model: Exl3 cache quant settings lenient with whitespace --- backends/exllamav3/model.py | 2 +- common/config_models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 18e04e8..330c4e1 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -233,7 +233,7 @@ class ExllamaV3Container(BaseModelContainer): case "Q8": self.cache_mode = "8,8" - split_cache_mode = re.search(r"^([2-8]),([2-8])$", self.cache_mode) + split_cache_mode = re.search(r"^([2-8])\s*,\s*([2-8])$", self.cache_mode) if split_cache_mode: k_bits = int(split_cache_mode.group(1)) v_bits = int(split_cache_mode.group(2)) diff --git a/common/config_models.py b/common/config_models.py index 8ee4ff7..b4a245e 100644 --- a/common/config_models.py +++ b/common/config_models.py @@ -10,7 +10,7 @@ from typing import List, Literal, Optional, Union CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"] -CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8],[2-8]$")] +CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8]\s*,\s*[2-8]$")] class Metadata(BaseModel):