Model: Add log messages for model loading
It's useful to know the split method that the model is being loaded on. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
parent
a3a32c30a4
commit
a4d02c2b70
1 changed files with 10 additions and 0 deletions
|
|
@ -188,6 +188,7 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
# Set GPU split options
|
# Set GPU split options
|
||||||
# Enable manual GPU split if provided
|
# Enable manual GPU split if provided
|
||||||
if gpu_split:
|
if gpu_split:
|
||||||
|
self.gpu_split_auto = False
|
||||||
self.gpu_split = gpu_split
|
self.gpu_split = gpu_split
|
||||||
|
|
||||||
# Causes crash if set with GPU split
|
# Causes crash if set with GPU split
|
||||||
|
|
@ -464,6 +465,15 @@ class ExllamaV3Container(BaseModelContainer):
|
||||||
if value:
|
if value:
|
||||||
yield value
|
yield value
|
||||||
|
|
||||||
|
logger.info("Loading model: " + str(self.model_dir))
|
||||||
|
|
||||||
|
if self.use_tp:
|
||||||
|
logger.info("Loading with tensor parallel")
|
||||||
|
elif self.gpu_split_auto:
|
||||||
|
logger.info("Loading with autosplit")
|
||||||
|
else:
|
||||||
|
logger.info("Loading with a manual GPU split (or a one GPU setup)")
|
||||||
|
|
||||||
for value in self.model.load_gen(
|
for value in self.model.load_gen(
|
||||||
tensor_p=self.use_tp,
|
tensor_p=self.use_tp,
|
||||||
tp_backend=self.tp_backend,
|
tp_backend=self.tp_backend,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue