From d15eb55f20a7630025563584778345298b00370c Mon Sep 17 00:00:00 2001 From: kingbri <8082010+kingbri1@users.noreply.github.com> Date: Mon, 12 May 2025 09:47:49 -0400 Subject: [PATCH] Model: Fix exl2 cache mode check FP16 was not included in the validation step. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com> --- backends/exllamav2/model.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 65689f4..f7314a7 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -188,7 +188,7 @@ class ExllamaV2Container(BaseModelContainer): self.cache_mode = unwrap(kwargs.get("cache_mode"), "FP16") # Catch exllamav3 cache_mode - if not self.cache_mode.startswith("Q"): + if not self.cache.mode == "FP16" or not self.cache_mode.startswith("Q"): logger.warning( f"Provided cache mode '{self.cache_mode}' is not a " "valid choice for exllamav2, please check your settings. " @@ -402,7 +402,10 @@ class ExllamaV2Container(BaseModelContainer): self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16") # Catch exllamav3 draft_cache_mode - if not self.draft_cache_mode.startswith("Q"): + if ( + not self.draft_cache_mode == "FP16" + or not self.draft_cache_mode.startswith("Q") + ): logger.warning( f"Provided draft cache mode '{self.draft_cache_mode}' is not a " "valid choice for exllamav2, please check your settings. "