From 102af306e50f5a9259c42b4352dfea00e52727e8 Mon Sep 17 00:00:00 2001
From: DocShotgun <126566557+DocShotgun@users.noreply.github.com>
Date: Fri, 1 Aug 2025 10:59:13 -0700
Subject: [PATCH] Config: Remove developer arg cuda_malloc_backend *
 cudaMallocAsync is now enabled by default on supported configurations

---
 common/config_models.py | 3 ---
 config_sample.yml       | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 46a7b5e..b2b36e4 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -419,9 +419,6 @@ class DeveloperConfig(BaseConfigModel):
     disable_request_streaming: Optional[bool] = Field(
         False, description=("Disable API request streaming (default: False).")
     )
-    cuda_malloc_backend: Optional[bool] = Field(
-        False, description=("Enable the torch CUDA malloc backend (default: False).")
-    )
     realtime_process_priority: Optional[bool] = Field(
         False,
         description=(
diff --git a/config_sample.yml b/config_sample.yml
index 1ee82a9..97cafa6 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -216,9 +216,6 @@ developer:
   # Disable API request streaming (default: False).
   disable_request_streaming: false
 
-  # Enable the torch CUDA malloc backend (default: False).
-  cuda_malloc_backend: false
-
   # Set process to use a higher priority.
   # For realtime process priority, run as administrator or sudo.
   # Otherwise, the priority will be set to high.