Model: Exl3 cache quant settings lenient with whitespace

2025-05-01 23:05:41 -07:00 · 2025-05-01 23:05:41 -07:00 · 58e34ba4c5
commit 58e34ba4c5
parent 68a660bdb3
2 changed files with 2 additions and 2 deletions
--- a/backends/exllamav3/model.py
+++ b/backends/exllamav3/model.py
@ -233,7 +233,7 @@ class ExllamaV3Container(BaseModelContainer):
            case "Q8":
                self.cache_mode = "8,8"

-        split_cache_mode = re.search(r"^([2-8]),([2-8])$", self.cache_mode)
+        split_cache_mode = re.search(r"^([2-8])\s*,\s*([2-8])$", self.cache_mode)
        if split_cache_mode:
            k_bits = int(split_cache_mode.group(1))
            v_bits = int(split_cache_mode.group(2))
--- a/common/config_models.py
+++ b/common/config_models.py
@ -10,7 +10,7 @@ from typing import List, Literal, Optional, Union


 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
-CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8],[2-8]$")]
+CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8]\s*,\s*[2-8]$")]


 class Metadata(BaseModel):