Model: Check for unsupported cache mode in exllamav2
This commit is contained in:
parent
45b966363e
commit
9dcde59c57
2 changed files with 18 additions and 2 deletions
|
|
@ -185,9 +185,17 @@ class ExllamaV2Container(BaseModelContainer):
|
||||||
# MARK: User configuration
|
# MARK: User configuration
|
||||||
|
|
||||||
# Get cache mode
|
# Get cache mode
|
||||||
# TODO: Separate validation for Exl2 and Exl3 q-cache options
|
|
||||||
self.cache_mode = unwrap(kwargs.get("cache_mode"), "FP16")
|
self.cache_mode = unwrap(kwargs.get("cache_mode"), "FP16")
|
||||||
|
|
||||||
|
# Catch exllamav3 cache_mode
|
||||||
|
if not self.cache_mode.startswith("Q"):
|
||||||
|
logger.warning(
|
||||||
|
f"Provided cache mode '{self.cache_mode}' is not a "
|
||||||
|
"valid choice for exllamav2, please check your settings. "
|
||||||
|
"Defaulting to FP16."
|
||||||
|
)
|
||||||
|
self.cache_mode = "FP16"
|
||||||
|
|
||||||
# Turn off GPU split if the user is using 1 GPU
|
# Turn off GPU split if the user is using 1 GPU
|
||||||
gpu_count = torch.cuda.device_count()
|
gpu_count = torch.cuda.device_count()
|
||||||
gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
|
gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
|
||||||
|
|
@ -393,6 +401,15 @@ class ExllamaV2Container(BaseModelContainer):
|
||||||
# Set draft cache mode
|
# Set draft cache mode
|
||||||
self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16")
|
self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16")
|
||||||
|
|
||||||
|
# Catch exllamav3 draft_cache_mode
|
||||||
|
if not self.draft_cache_mode.startswith("Q"):
|
||||||
|
logger.warning(
|
||||||
|
f"Provided draft cache mode '{self.draft_cache_mode}' is not a "
|
||||||
|
"valid choice for exllamav2, please check your settings. "
|
||||||
|
"Defaulting to FP16."
|
||||||
|
)
|
||||||
|
self.draft_cache_mode = "FP16"
|
||||||
|
|
||||||
# Edit the draft config size
|
# Edit the draft config size
|
||||||
if chunk_size:
|
if chunk_size:
|
||||||
self.draft_config.max_input_len = chunk_size
|
self.draft_config.max_input_len = chunk_size
|
||||||
|
|
|
||||||
|
|
@ -229,7 +229,6 @@ class ModelConfig(BaseConfigModel):
|
||||||
"or auto-calculate."
|
"or auto-calculate."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
# TODO: Separate validation for Exl2 and Exl3 q-cache options
|
|
||||||
cache_mode: Optional[CACHE_TYPE] = Field(
|
cache_mode: Optional[CACHE_TYPE] = Field(
|
||||||
"FP16",
|
"FP16",
|
||||||
description=(
|
description=(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue