diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 65689f4..f7314a7 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -188,7 +188,7 @@ class ExllamaV2Container(BaseModelContainer): self.cache_mode = unwrap(kwargs.get("cache_mode"), "FP16") # Catch exllamav3 cache_mode - if not self.cache_mode.startswith("Q"): + if not self.cache.mode == "FP16" or not self.cache_mode.startswith("Q"): logger.warning( f"Provided cache mode '{self.cache_mode}' is not a " "valid choice for exllamav2, please check your settings. " @@ -402,7 +402,10 @@ class ExllamaV2Container(BaseModelContainer): self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16") # Catch exllamav3 draft_cache_mode - if not self.draft_cache_mode.startswith("Q"): + if ( + not self.draft_cache_mode == "FP16" + or not self.draft_cache_mode.startswith("Q") + ): logger.warning( f"Provided draft cache mode '{self.draft_cache_mode}' is not a " "valid choice for exllamav2, please check your settings. "