Model: Fix autosplit reserve crash with GPU split
ExllamaV3 does not accept autosplit_reserve and gpu_split at the same time. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
parent
0858b6d4b2
commit
084916c04f
1 changed files with 6 additions and 2 deletions
|
|
@ -69,9 +69,9 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
generator: Optional[AsyncGenerator] = None
|
generator: Optional[AsyncGenerator] = None
|
||||||
|
|
||||||
# Class-specific vars
|
# Class-specific vars
|
||||||
gpu_split: List[float] | None = None
|
gpu_split: Optional[List[float]] = None
|
||||||
gpu_split_auto: bool = True
|
gpu_split_auto: bool = True
|
||||||
autosplit_reserve: List[float] = [96 / 1024]
|
autosplit_reserve: Optional[List[float]] = [96 / 1024]
|
||||||
use_tp: bool = False
|
use_tp: bool = False
|
||||||
max_seq_len: int = 4096
|
max_seq_len: int = 4096
|
||||||
cache_size: int = 4096
|
cache_size: int = 4096
|
||||||
|
|
@ -155,6 +155,10 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
if gpu_split:
|
if gpu_split:
|
||||||
self.gpu_split = gpu_split
|
self.gpu_split = gpu_split
|
||||||
|
|
||||||
|
# Causes crash if set with GPU split
|
||||||
|
# TODO: Remove when fixed in exllama upstream
|
||||||
|
self.autosplit_reserve = None
|
||||||
|
|
||||||
gpu_device_list = [
|
gpu_device_list = [
|
||||||
device_idx
|
device_idx
|
||||||
for device_idx, memory in enumerate(self.gpu_split)
|
for device_idx, memory in enumerate(self.gpu_split)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue