Model: Exl3 cache quant settings lenient with whitespace
This commit is contained in:
parent
68a660bdb3
commit
58e34ba4c5
2 changed files with 2 additions and 2 deletions
|
|
@ -233,7 +233,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
case "Q8":
|
case "Q8":
|
||||||
self.cache_mode = "8,8"
|
self.cache_mode = "8,8"
|
||||||
|
|
||||||
split_cache_mode = re.search(r"^([2-8]),([2-8])$", self.cache_mode)
|
split_cache_mode = re.search(r"^([2-8])\s*,\s*([2-8])$", self.cache_mode)
|
||||||
if split_cache_mode:
|
if split_cache_mode:
|
||||||
k_bits = int(split_cache_mode.group(1))
|
k_bits = int(split_cache_mode.group(1))
|
||||||
v_bits = int(split_cache_mode.group(2))
|
v_bits = int(split_cache_mode.group(2))
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import List, Literal, Optional, Union
|
||||||
|
|
||||||
|
|
||||||
CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
|
CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
|
||||||
CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8],[2-8]$")]
|
CACHE_TYPE = Union[CACHE_SIZES, constr(pattern=r"^[2-8]\s*,\s*[2-8]$")]
|
||||||
|
|
||||||
|
|
||||||
class Metadata(BaseModel):
|
class Metadata(BaseModel):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue