Model: Add tensor_parallel_backend option

This allows for users to use nccl or native depending on the GPU setup. NCCL is only available with Linux built wheels. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2025-08-17 21:42:30 -04:00 · 2025-08-17 21:42:30 -04:00 · 43f9483bc4
commit 43f9483bc4
parent b9952f319e
4 changed files with 26 additions and 2 deletions
--- a/common/config_models.py
+++ b/common/config_models.py
@ -183,11 +183,20 @@ class ModelConfig(BaseConfigModel):
    tensor_parallel: Optional[bool] = Field(
        False,
        description=(
-            "Load model with tensor parallelism.\n"
+            "Load model with tensor parallelism (default: False).\n"
            "Falls back to autosplit if GPU split isn't provided.\n"
            "This ignores the gpu_split_auto value."
        ),
    )
+    tensor_parallel_backend: Optional[str] = Field(
+        "native",
+        description=(
+            "Sets a backend type for tensor parallelism. (default: native).\n"
+            "Options: native, nccl\n"
+            "Native is recommended for PCIe GPUs\n"
+            "NCCL is recommended for NVLink."
+        ),
+    )
    gpu_split_auto: Optional[bool] = Field(
        True,
        description=(