Model: Fix autosplit reserve crash with GPU split

ExllamaV3 does not accept autosplit_reserve and gpu_split at the same
time.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri 2025-05-17 00:51:14 -04:00
parent 0858b6d4b2
commit 084916c04f

View file

@ -69,9 +69,9 @@ class ExllamaV3Container(BaseModelContainer):
generator: Optional[AsyncGenerator] = None
# Class-specific vars
gpu_split: List[float] | None = None
gpu_split: Optional[List[float]] = None
gpu_split_auto: bool = True
autosplit_reserve: List[float] = [96 / 1024]
autosplit_reserve: Optional[List[float]] = [96 / 1024]
use_tp: bool = False
max_seq_len: int = 4096
cache_size: int = 4096
@ -155,6 +155,10 @@ class ExllamaV3Container(BaseModelContainer):
if gpu_split:
self.gpu_split = gpu_split
# Causes crash if set with GPU split
# TODO: Remove when fixed in exllama upstream
self.autosplit_reserve = None
gpu_device_list = [
device_idx
for device_idx, memory in enumerate(self.gpu_split)