From a46ee62d03b8be772ff1ecdfb231250f34cfe297 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 25 May 2024 20:56:07 -0400
Subject: [PATCH] Model: Clarify warning and device check on load

FA2 v2.5.7 and up is not supported below ampere and on AMD GPUs.
Clarify the error message and explain what happens as a result.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 backends/exllamav2/model.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 30f8a2c..52bc890 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -400,21 +400,27 @@ class ExllamaV2Container:
             async for value in iterate_in_threadpool(model_load_generator):
                 yield value
 
-            # Disable paged mode if the user's min GPU is supported (ampere and above)
+            # Disable paged mode if the user's min GPU isn't supported (ampere and up)
+            device_list = {
+                module.device_idx
+                for module in self.model.modules
+                if module.device_idx >= 0
+            }
             min_compute_capability = min(
-                set(
-                    [
-                        torch.cuda.get_device_capability(device=module.device_idx)[0]
-                        for module in self.model.modules
-                        if module.device_idx >= 0
-                    ]
-                )
+                torch.cuda.get_device_capability(device=device)[0]
+                for device in device_list
             )
 
+            # Compute capability < 8 is not supported by FA2
+            # AMD is also unsupported until ROCm updates its FA2 fork
             if torch.version.hip or min_compute_capability < 8:
                 logger.warning(
                     "An unsupported GPU is found in this configuration. "
-                    "Switching to compatibility mode. This disables parallel batching."
+                    "Switching to compatibility mode. \n"
+                    "This disables parallel batching "
+                    "and features that rely on it (ex. CFG). \n"
+                    "To disable compatability mode, all GPUs must be ampere "
+                    "(30 series) or newer. AMD GPUs are not supported."
                 )
                 self.paged = False
                 self.max_batch_size = 1