Model.py TP changes
This commit is contained in:
parent
6623dbcd86
commit
60ae419746
1 changed files with 9 additions and 1 deletions
|
|
@ -163,13 +163,19 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
|
gpu_split_auto = unwrap(kwargs.get("gpu_split_auto"), True)
|
||||||
gpu_split = unwrap(kwargs.get("gpu_split"), None)
|
gpu_split = unwrap(kwargs.get("gpu_split"), None)
|
||||||
gpu_device_list = list(range(0, gpu_count))
|
gpu_device_list = list(range(0, gpu_count))
|
||||||
|
use_tp = unwrap(kwargs.get("tensor_parallel"), False)
|
||||||
|
|
||||||
# Set GPU split options
|
# Set GPU split options
|
||||||
if gpu_count == 1:
|
if gpu_count == 1:
|
||||||
self.gpu_split_auto = False
|
self.gpu_split_auto = False
|
||||||
logger.info("Disabling GPU split because one GPU is in use.")
|
logger.info("Disabling GPU split because one GPU is in use.")
|
||||||
else:
|
else:
|
||||||
# TODO: Set tensor parallel
|
# Set tensor parallel
|
||||||
|
if use_tp:
|
||||||
|
self.use_tp = True
|
||||||
|
|
||||||
|
# TP has its own autosplit loader
|
||||||
|
self.gpu_split_auto = False
|
||||||
|
|
||||||
# Set GPU split options
|
# Set GPU split options
|
||||||
# Enable manual GPU split if provided
|
# Enable manual GPU split if provided
|
||||||
|
|
@ -451,6 +457,8 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
yield value
|
yield value
|
||||||
|
|
||||||
for value in self.model.load_gen(
|
for value in self.model.load_gen(
|
||||||
|
tensor_p=self.use_tp,
|
||||||
|
tp_backend="native",
|
||||||
reserve_per_device=self.autosplit_reserve,
|
reserve_per_device=self.autosplit_reserve,
|
||||||
use_per_device=self.gpu_split,
|
use_per_device=self.gpu_split,
|
||||||
callback=progress_callback,
|
callback=progress_callback,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue