Model.py TP changes
This commit is contained in:
parent
6623dbcd86
commit
60ae419746
1 changed files with 9 additions and 1 deletions
|
|
@ -163,13 +163,19 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
|
||||
gpu_split = unwrap(kwargs.get("gpu_split"), None)
|
||||
gpu_device_list = list(range(0, gpu_count))
|
||||
use_tp = unwrap(kwargs.get("tensor_parallel"), False)
|
||||
|
||||
# Set GPU split options
|
||||
if gpu_count == 1:
|
||||
self.gpu_split_auto = False
|
||||
logger.info("Disabling GPU split because one GPU is in use.")
|
||||
else:
|
||||
# TODO: Set tensor parallel
|
||||
# Set tensor parallel
|
||||
if use_tp:
|
||||
self.use_tp = True
|
||||
|
||||
# TP has its own autosplit loader
|
||||
self.gpu_split_auto = False
|
||||
|
||||
# Set GPU split options
|
||||
# Enable manual GPU split if provided
|
||||
|
|
@ -451,6 +457,8 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
yield value
|
||||
|
||||
for value in self.model.load_gen(
|
||||
tensor_p=self.use_tp,
|
||||
tp_backend="native",
|
||||
reserve_per_device=self.autosplit_reserve,
|
||||
use_per_device=self.gpu_split,
|
||||
callback=progress_callback,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue