Model: Add log messages for model loading

It's useful to know the split method that the model is being loaded
on.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri 2025-08-17 23:09:27 -04:00
parent a3a32c30a4
commit a4d02c2b70

View file

@ -188,6 +188,7 @@ class ExllamaV3Container(BaseModelContainer):
# Set GPU split options # Set GPU split options
# Enable manual GPU split if provided # Enable manual GPU split if provided
if gpu_split: if gpu_split:
self.gpu_split_auto = False
self.gpu_split = gpu_split self.gpu_split = gpu_split
# Causes crash if set with GPU split # Causes crash if set with GPU split
@ -464,6 +465,15 @@ class ExllamaV3Container(BaseModelContainer):
if value: if value:
yield value yield value
logger.info("Loading model: " + str(self.model_dir))
if self.use_tp:
logger.info("Loading with tensor parallel")
elif self.gpu_split_auto:
logger.info("Loading with autosplit")
else:
logger.info("Loading with a manual GPU split (or a one GPU setup)")
for value in self.model.load_gen( for value in self.model.load_gen(
tensor_p=self.use_tp, tensor_p=self.use_tp,
tp_backend=self.tp_backend, tp_backend=self.tp_backend,