From a46ee62d03b8be772ff1ecdfb231250f34cfe297 Mon Sep 17 00:00:00 2001 From: kingbri Date: Sat, 25 May 2024 20:56:07 -0400 Subject: [PATCH] Model: Clarify warning and device check on load FA2 v2.5.7 and up is not supported below ampere and on AMD GPUs. Clarify the error message and explain what happens as a result. Signed-off-by: kingbri --- backends/exllamav2/model.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 30f8a2c..52bc890 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -400,21 +400,27 @@ class ExllamaV2Container: async for value in iterate_in_threadpool(model_load_generator): yield value - # Disable paged mode if the user's min GPU is supported (ampere and above) + # Disable paged mode if the user's min GPU isn't supported (ampere and up) + device_list = { + module.device_idx + for module in self.model.modules + if module.device_idx >= 0 + } min_compute_capability = min( - set( - [ - torch.cuda.get_device_capability(device=module.device_idx)[0] - for module in self.model.modules - if module.device_idx >= 0 - ] - ) + torch.cuda.get_device_capability(device=device)[0] + for device in device_list ) + # Compute capability < 8 is not supported by FA2 + # AMD is also unsupported until ROCm updates its FA2 fork if torch.version.hip or min_compute_capability < 8: logger.warning( "An unsupported GPU is found in this configuration. " - "Switching to compatibility mode. This disables parallel batching." + "Switching to compatibility mode. \n" + "This disables parallel batching " + "and features that rely on it (ex. CFG). \n" + "To disable compatability mode, all GPUs must be ampere " + "(30 series) or newer. AMD GPUs are not supported." ) self.paged = False self.max_batch_size = 1