diff --git a/common/model.py b/common/model.py
index 105daa5..14d3cc0 100644
--- a/common/model.py
+++ b/common/model.py
@@ -9,7 +9,9 @@ from loguru import logger
 from typing import Optional
 
 from backends.exllamav2.model import ExllamaV2Container
+from common import config
 from common.logger import get_loading_progress_bar
+from common.utils import unwrap
 
 # Global model container
 container: Optional[ExllamaV2Container] = None
@@ -91,3 +93,19 @@ async def load_loras(lora_dir, **kwargs):
 async def unload_loras():
     """Wrapper to unload loras"""
     await container.unload(loras_only=True)
+
+
+def get_config_default(key, fallback=None, is_draft=False):
+    """Fetches a default value from model config if allowed by the user."""
+
+    model_config = config.model_config()
+    default_keys = unwrap(model_config.get("use_as_default"), [])
+    if key in default_keys:
+        # Is this a draft model load parameter?
+        if is_draft:
+            draft_config = config.draft_model_config()
+            return unwrap(draft_config.get(key), fallback)
+        else:
+            return unwrap(model_config.get(key), fallback)
+    else:
+        return fallback
diff --git a/config_sample.yml b/config_sample.yml
index 1c0304d..0e4b180 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -51,19 +51,29 @@ developer:
   #cuda_malloc_backend: False
 
 # Options for model overrides and loading
+# Please read the comments to understand how arguments are handled between initial and API loads
 model:
   # Overrides the directory to look for models (default: models)
   # Windows users, DO NOT put this path in quotes! This directory will be invalid otherwise.
   model_dir: models
 
+  # Sends dummy model names when the models endpoint is queried
+  # Enable this if the program is looking for a specific OAI model
+  #use_dummy_models: False
+
   # An initial model to load. Make sure the model is located in the model directory!
   # A model can be loaded later via the API.
   # REQUIRED: This must be filled out to load a model on startup!
   model_name:
 
-  # Sends dummy model names when the models endpoint is queried
-  # Enable this if the program is looking for a specific OAI model
-  #use_dummy_models: False
+  # The below parameters only apply for initial loads
+  # All API based loads do NOT inherit these settings unless specified in use_as_default
+
+  # Names of args to use as a default fallback for API load requests (default: [])
+  # For example, if you always want cache_mode to be Q4 instead of on the inital model load,
+  # Add "cache_mode" to this array
+  # Ex. ["max_seq_len", "cache_mode"]
+  #use_as_default: []
 
   # The below parameters apply only if model_name is set
 
@@ -143,6 +153,9 @@ model:
     # A draft model can be loaded later via the API.
     #draft_model_name: A model name
   
+    # The below parameters only apply for initial loads
+    # All API based loads do NOT inherit these settings unless specified in use_as_default
+
     # Rope scale for draft models (default: 1.0)
     # Same thing as compress_pos_emb
     # Only use if your draft model was trained on long context with rope (check config.json)
diff --git a/endpoints/OAI/types/model.py b/endpoints/OAI/types/model.py
index c549b49..30730b8 100644
--- a/endpoints/OAI/types/model.py
+++ b/endpoints/OAI/types/model.py
@@ -5,6 +5,7 @@ from time import time
 from typing import List, Optional
 
 from common.gen_logging import GenLogPreferences
+from common.model import get_config_default
 
 
 class ModelCardParameters(BaseModel):
@@ -46,60 +47,92 @@ class ModelList(BaseModel):
 class DraftModelLoadRequest(BaseModel):
     """Represents a draft model load request."""
 
+    # Required
     draft_model_name: str
-    draft_rope_scale: Optional[float] = 1.0
+
+    # Config arguments
+    draft_rope_scale: Optional[float] = Field(
+        default_factory=lambda: get_config_default(
+            "draft_rope_scale", 1.0, is_draft=True
+        )
+    )
     draft_rope_alpha: Optional[float] = Field(
         description="Automatically calculated if not present",
-        default=None,
+        default_factory=lambda: get_config_default(
+            "draft_rope_alpha", None, is_draft=True
+        ),
         examples=[1.0],
     )
-    draft_cache_mode: Optional[str] = "FP16"
+    draft_cache_mode: Optional[str] = Field(
+        default_factory=lambda: get_config_default(
+            "draft_cache_mode", "FP16", is_draft=True
+        )
+    )
 
 
 class ModelLoadRequest(BaseModel):
     """Represents a model load request."""
 
+    # Required
     name: str
 
+    # Config arguments
+
     # Max seq len is fetched from config.json of the model by default
     max_seq_len: Optional[int] = Field(
         description="Leave this blank to use the model's base sequence length",
-        default=None,
+        default_factory=lambda: get_config_default("max_seq_len"),
         examples=[4096],
     )
     override_base_seq_len: Optional[int] = Field(
         description=(
             "Overrides the model's base sequence length. " "Leave blank if unsure"
         ),
-        default=None,
+        default_factory=lambda: get_config_default("override_base_seq_len"),
         examples=[4096],
     )
     cache_size: Optional[int] = Field(
         description=("Number in tokens, must be greater than or equal to max_seq_len"),
-        default=None,
+        default_factory=lambda: get_config_default("cache_size"),
         examples=[4096],
     )
-    gpu_split_auto: Optional[bool] = True
-    autosplit_reserve: Optional[List[float]] = [96]
+    gpu_split_auto: Optional[bool] = Field(
+        default_factory=lambda: get_config_default("gpu_split_auto", True)
+    )
+    autosplit_reserve: Optional[List[float]] = Field(
+        default_factory=lambda: get_config_default("autosplit_reserve", [96])
+    )
     gpu_split: Optional[List[float]] = Field(
-        default_factory=list, examples=[[24.0, 20.0]]
+        default_factory=lambda: get_config_default("gpu_split", []),
+        examples=[[24.0, 20.0]],
     )
     rope_scale: Optional[float] = Field(
         description="Automatically pulled from the model's config if not present",
-        default=None,
+        default_factory=lambda: get_config_default("rope_scale"),
         examples=[1.0],
     )
     rope_alpha: Optional[float] = Field(
         description="Automatically calculated if not present",
-        default=None,
+        default_factory=lambda: get_config_default("rope_alpha"),
         examples=[1.0],
     )
-    # low_mem: Optional[bool] = False
-    cache_mode: Optional[str] = "FP16"
-    chunk_size: Optional[int] = 2048
-    prompt_template: Optional[str] = None
-    num_experts_per_token: Optional[int] = None
-    fasttensors: Optional[bool] = False
+    cache_mode: Optional[str] = Field(
+        default_factory=lambda: get_config_default("cache_mode", "FP16")
+    )
+    chunk_size: Optional[int] = Field(
+        default_factory=lambda: get_config_default("chunk_size", 2048)
+    )
+    prompt_template: Optional[str] = Field(
+        default_factory=lambda: get_config_default("prompt_template")
+    )
+    num_experts_per_token: Optional[int] = Field(
+        default_factory=lambda: get_config_default("num_experts_per_token")
+    )
+    fasttensors: Optional[bool] = Field(
+        default_factory=lambda: get_config_default("fasttensors", False)
+    )
+
+    # Non-config arguments
     draft: Optional[DraftModelLoadRequest] = None
     skip_queue: Optional[bool] = False