Model: Fix autosplit reserve crash with GPU split
ExllamaV3 does not accept autosplit_reserve and gpu_split at the same time. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
parent
0858b6d4b2
commit
084916c04f
1 changed files with 6 additions and 2 deletions
|
|
@ -69,9 +69,9 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
generator: Optional[AsyncGenerator] = None
|
||||
|
||||
# Class-specific vars
|
||||
gpu_split: List[float] | None = None
|
||||
gpu_split: Optional[List[float]] = None
|
||||
gpu_split_auto: bool = True
|
||||
autosplit_reserve: List[float] = [96 / 1024]
|
||||
autosplit_reserve: Optional[List[float]] = [96 / 1024]
|
||||
use_tp: bool = False
|
||||
max_seq_len: int = 4096
|
||||
cache_size: int = 4096
|
||||
|
|
@ -155,6 +155,10 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
if gpu_split:
|
||||
self.gpu_split = gpu_split
|
||||
|
||||
# Causes crash if set with GPU split
|
||||
# TODO: Remove when fixed in exllama upstream
|
||||
self.autosplit_reserve = None
|
||||
|
||||
gpu_device_list = [
|
||||
device_idx
|
||||
for device_idx, memory in enumerate(self.gpu_split)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue