From a4d02c2b7044be394607f5cf6dee6f22621c68a7 Mon Sep 17 00:00:00 2001 From: kingbri <8082010+kingbri1@users.noreply.github.com> Date: Sun, 17 Aug 2025 23:09:27 -0400 Subject: [PATCH] Model: Add log messages for model loading It's useful to know the split method that the model is being loaded on. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com> --- backends/exllamav3/model.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 53d2e91..217f5bf 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -188,6 +188,7 @@ class ExllamaV3Container(BaseModelContainer): # Set GPU split options # Enable manual GPU split if provided if gpu_split: + self.gpu_split_auto = False self.gpu_split = gpu_split # Causes crash if set with GPU split @@ -464,6 +465,15 @@ class ExllamaV3Container(BaseModelContainer): if value: yield value + logger.info("Loading model: " + str(self.model_dir)) + + if self.use_tp: + logger.info("Loading with tensor parallel") + elif self.gpu_split_auto: + logger.info("Loading with autosplit") + else: + logger.info("Loading with a manual GPU split (or a one GPU setup)") + for value in self.model.load_gen( tensor_p=self.use_tp, tp_backend=self.tp_backend,