Model: Exl3 cache quant settings lenient with whitespace

This commit is contained in:
DocShotgun 2025-05-01 23:05:41 -07:00
parent 68a660bdb3
commit 58e34ba4c5
2 changed files with 2 additions and 2 deletions

View file

@ -233,7 +233,7 @@ class ExllamaV3Container(BaseModelContainer):
case "Q8":
self.cache_mode = "8,8"
split_cache_mode = re.search(r"^([2-8]),([2-8])$", self.cache_mode)
split_cache_mode = re.search(r"^([2-8])\s*,\s*([2-8])$", self.cache_mode)
if split_cache_mode:
k_bits = int(split_cache_mode.group(1))
v_bits = int(split_cache_mode.group(2))