From 3340c3bf2f9456270390065a35c12b53a1c8739b Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 00:01:30 -0400
Subject: [PATCH] Config: Rewrite descriptions

This makes both config.yml and args more descriptive than before.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 218 ++++++++++++++++++++++++++--------------
 1 file changed, 141 insertions(+), 77 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 637348c..e81b358 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,6 +1,6 @@
 from inspect import getdoc
 from pathlib import Path
-from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
 from textwrap import dedent
 from typing import List, Literal, Optional, Union
 
@@ -57,18 +57,37 @@ class UtilityActions(BaseConfigModel):
 class NetworkConfig(BaseConfigModel):
     """Options for networking"""
 
-    host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
-    port: Optional[int] = Field(5000, description=("The port to host on"))
+    host: Optional[str] = Field(
+        "127.0.0.1",
+        description=(
+            "The IP to host on (default: 127.0.0.1).\n"
+            "Use 0.0.0.0 to expose on all network adapters."
+        ),
+    )
+    port: Optional[int] = Field(
+        5000, description=("The port to host on (default: 5000).")
+    )
     disable_auth: Optional[bool] = Field(
-        False, description=("Disable HTTP token authentication with requests")
+        False,
+        description=(
+            "Disable HTTP token authentication with requests.\n"
+            "WARNING: This will make your instance vulnerable!\n"
+            "Turn on this option if you are ONLY connecting from localhost."
+        ),
     )
     send_tracebacks: Optional[bool] = Field(
         False,
-        description=("Decide whether to send error tracebacks over the API"),
+        description=(
+            "Send tracebacks over the API (default: False).\n"
+            "NOTE: Only enable this for debug purposes."
+        ),
     )
     api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
         default_factory=list,
-        description=("API servers to enable. Options: (OAI, Kobold)"),
+        description=(
+            'Select API servers to enable (default: ["OAI"]).\n'
+            "Possible values: OAI, Kobold."
+        ),
     )
 
 
@@ -79,18 +98,18 @@ class LoggingConfig(BaseConfigModel):
 
     log_prompt: Optional[bool] = Field(
         False,
-        description=("Enable prompt logging"),
-        validation_alias=AliasChoices("log_prompt", "prompt"),
+        description=("Enable prompt logging (default: False)."),
     )
     log_generation_params: Optional[bool] = Field(
         False,
-        description=("Enable generation parameter logging"),
-        validation_alias=AliasChoices("log_generation_params", "generation_params"),
+        description=("Enable generation parameter logging (default: False)."),
     )
     log_requests: Optional[bool] = Field(
         False,
-        description=("Enable request logging"),
-        validation_alias=AliasChoices("log_requests", "requests"),
+        description=(
+            "Enable request logging (default: False).\n"
+            "NOTE: Only use this for debugging!"
+        ),
     )
 
 
@@ -105,101 +124,117 @@ class ModelConfig(BaseConfigModel):
     model_dir: str = Field(
         "models",
         description=(
-            "Overrides the directory to look for models (default: models). Windows "
-            "users, do NOT put this path in quotes."
+            "Directory to look for models (default: models).\n"
+            "Windows users, do NOT put this path in quotes!"
+        ),
+    )
+    inline_model_loading: Optional[bool] = Field(
+        True,
+        description=(
+            "Allow direct loading of models "
+            "from a completion or chat completion request (default: False)."
         ),
     )
     use_dummy_models: Optional[bool] = Field(
         False,
         description=(
-            "Sends dummy model names when the models endpoint is queried. Enable this "
-            "if looking for specific OAI models."
+            "Sends dummy model names when the models endpoint is queried.\n"
+            "Enable this if the client is looking for specific OAI models."
         ),
     )
     model_name: Optional[str] = Field(
         None,
         description=(
-            "An initial model to load. Make sure the model is located in the model "
-            "directory! REQUIRED: This must be filled out to load a model on startup."
+            "An initial model to load.\n"
+            "Make sure the model is located in the model directory!\n"
+            "REQUIRED: This must be filled out to load a model on startup."
         ),
     )
     use_as_default: List[str] = Field(
         default_factory=list,
         description=(
-            "Names of args to use as a default fallback for API load requests "
-            "(default: []). Example: ['max_seq_len', 'cache_mode']"
+            "Names of args to use as a fallback for API load requests (default: []).\n"
+            "For example, if you always want cache_mode to be Q4 "
+            'instead of on the inital model load, add "cache_mode" to this array.\n'
+            "Example: ['max_seq_len', 'cache_mode']."
         ),
     )
     max_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Max sequence length. Fetched from the model's base sequence length in "
-            "config.json by default."
+            "Max sequence length (default: Empty).\n"
+            "Fetched from the model's base sequence length in config.json by default."
         ),
         ge=0,
     )
     override_base_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Overrides base model context length. WARNING: Only use this if the "
-            "model's base sequence length is incorrect."
+            "Overrides base model context length (default: Empty).\n"
+            "WARNING: Don't set this unless you know what you're doing!\n"
+            "Again, do NOT use this for configuring context length, "
+            "use max_seq_len above ^"
         ),
         ge=0,
     )
     tensor_parallel: Optional[bool] = Field(
         False,
         description=(
-            "Load model with tensor parallelism. Fallback to autosplit if GPU split "
-            "isn't provided."
+            "Load model with tensor parallelism.\n"
+            "Falls back to autosplit if GPU split isn't provided.\n"
+            "This ignores the gpu_split_auto value."
         ),
     )
     gpu_split_auto: Optional[bool] = Field(
         True,
         description=(
-            "Automatically allocate resources to GPUs (default: True). Not parsed for "
-            "single GPU users."
+            "Automatically allocate resources to GPUs (default: True).\n"
+            "Not parsed for single GPU users."
         ),
     )
     autosplit_reserve: List[int] = Field(
         [96],
         description=(
-            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
+            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).\n"
             "Represented as an array of MB per GPU."
         ),
     )
     gpu_split: List[float] = Field(
         default_factory=list,
         description=(
-            "An integer array of GBs of VRAM to split between GPUs (default: []). "
+            "An integer array of GBs of VRAM to split between GPUs (default: []).\n"
             "Used with tensor parallelism."
         ),
     )
     rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
-            "model was trained on long context with rope."
+            "Rope scale (default: 1.0).\n"
+            "Same as compress_pos_emb.\n"
+            "Use if the model was trained on long context with rope.\n"
+            "Leave blank to pull the value from the model."
         ),
     )
     rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
         1.0,
         description=(
-            "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
-            "calculate."
+            "Rope alpha (default: 1.0).\n"
+            'Same as alpha_value. Set to "auto" to auto-calculate.'
         ),
     )
     cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
-            "Enable different cache modes for VRAM savings (default: FP16). Possible "
-            f"values: {str(CACHE_SIZES)[15:-1]}"
+            "Enable different cache modes for VRAM savings (default: FP16).\n"
+            f"Possible values: {str(CACHE_SIZES)[15:-1]}."
         ),
     )
     cache_size: Optional[int] = Field(
         None,
         description=(
-            "Size of the prompt cache to allocate (default: max_seq_len). Must be a "
-            "multiple of 256."
+            "Size of the prompt cache to allocate (default: max_seq_len).\n"
+            "Must be a multiple of 256 and can't be less than max_seq_len.\n"
+            "For CFG, set this to 2 * max_seq_len."
         ),
         multiple_of=256,
         gt=0,
@@ -207,39 +242,48 @@ class ModelConfig(BaseConfigModel):
     chunk_size: Optional[int] = Field(
         2048,
         description=(
-            "Chunk size for prompt ingestion (default: 2048). A lower value reduces "
-            "VRAM usage but decreases ingestion speed."
+            "Chunk size for prompt ingestion (default: 2048).\n"
+            "A lower value reduces VRAM usage but decreases ingestion speed.\n"
+            "NOTE: Effects vary depending on the model.\n"
+            "An ideal value is between 512 and 4096."
         ),
         gt=0,
     )
     max_batch_size: Optional[int] = Field(
         None,
         description=(
-            "Set the maximum number of prompts to process at one time (default: "
-            "None/Automatic). Automatically calculated if left blank."
+            "Set the maximum number of prompts to process at one time "
+            "(default: None/Automatic).\n"
+            "Automatically calculated if left blank.\n"
+            "NOTE: Only available for Nvidia ampere (30 series) and above GPUs."
         ),
         ge=1,
     )
     prompt_template: Optional[str] = Field(
         None,
         description=(
-            "Set the prompt template for this model. If empty, attempts to look for "
-            "the model's chat template."
+            "Set the prompt template for this model. (default: None)\n"
+            "If empty, attempts to look for the model's chat template.\n"
+            "If a model contains multiple templates in its tokenizer_config.json,\n"
+            "set prompt_template to the name of the template you want to use.\n"
+            "NOTE: Only works with chat completion message lists!"
         ),
     )
     num_experts_per_token: Optional[int] = Field(
         None,
         description=(
-            "Number of experts to use per token. Fetched from the model's "
-            "config.json. For MoE models only."
+            "Number of experts to use per token.\n"
+            "Fetched from the model's config.json if empty.\n"
+            "NOTE: For MoE models only.\n"
+            "WARNING: Don't set this unless you know what you're doing!"
         ),
         ge=1,
     )
     fasttensors: Optional[bool] = Field(
         False,
         description=(
-            "Enables fasttensors to possibly increase model loading speeds (default: "
-            "False)."
+            "Enables fasttensors to possibly increase model loading speeds "
+            "(default: False)."
         ),
     )
 
@@ -256,36 +300,35 @@ class DraftModelConfig(BaseConfigModel):
     # TODO: convert this to a pathlib.path?
     draft_model_dir: Optional[str] = Field(
         "models",
-        description=(
-            "Overrides the directory to look for draft models (default: models)"
-        ),
+        description=("Directory to look for draft models (default: models)"),
     )
     draft_model_name: Optional[str] = Field(
         None,
         description=(
-            "An initial draft model to load. Ensure the model is in the model"
-            "directory."
+            "An initial draft model to load.\n"
+            "Ensure the model is in the model directory."
         ),
     )
     draft_rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
+            "Rope scale for draft models (default: 1.0).\n"
+            "Same as compress_pos_emb.\n"
             "Use if the draft model was trained on long context with rope."
         ),
     )
     draft_rope_alpha: Optional[float] = Field(
         None,
         description=(
-            "Rope alpha for draft models (default: None). Same as alpha_value. Leave "
-            "blank to auto-calculate the alpha value."
+            "Rope alpha for draft models (default: None).\n"
+            'Same as alpha_value. Set to "auto" to auto-calculate.'
         ),
     )
     draft_cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
-            "Cache mode for draft models to save VRAM (default: FP16). Possible "
-            f"values: {str(CACHE_SIZES)[15:-1]}"
+            "Cache mode for draft models to save VRAM (default: FP16).\n"
+            f"Possible values: {str(CACHE_SIZES)[15:-1]}."
         ),
     )
 
@@ -293,10 +336,10 @@ class DraftModelConfig(BaseConfigModel):
 class LoraInstanceModel(BaseConfigModel):
     """Model representing an instance of a Lora."""
 
-    name: str = Field(..., description=("Name of the LoRA model"))
+    name: str = Field(..., description=("Name of the LoRA model."))
     scaling: float = Field(
         1.0,
-        description=("Scaling factor for the LoRA model (default: 1.0)"),
+        description=("Scaling factor for the LoRA model (default: 1.0)."),
         ge=0,
     )
 
@@ -306,13 +349,13 @@ class LoraConfig(BaseConfigModel):
 
     # TODO: convert this to a pathlib.path?
     lora_dir: Optional[str] = Field(
-        "loras", description=("Directory to look for LoRAs (default: 'loras')")
+        "loras", description=("Directory to look for LoRAs (default: loras).")
     )
     loras: Optional[List[LoraInstanceModel]] = Field(
         None,
         description=(
-            "List of LoRAs to load and associated scaling factors (default scaling: "
-            "1.0)"
+            "List of LoRAs to load and associated scaling factors "
+            "(default scale: 1.0)."
         ),
     )
 
@@ -327,19 +370,20 @@ class EmbeddingsConfig(BaseConfigModel):
     # TODO: convert this to a pathlib.path?
     embedding_model_dir: Optional[str] = Field(
         "models",
-        description=(
-            "Overrides directory to look for embedding models (default: models)"
-        ),
+        description=("Directory to look for embedding models (default: models)."),
     )
     embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
         "cpu",
         description=(
-            "Device to load embedding models on (default: cpu). Possible values: cpu, "
-            "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
+            "Device to load embedding models on (default: cpu).\n"
+            "Possible values: cpu, auto, cuda.\n"
+            "NOTE: It's recommended to load embedding models on the CPU.\n"
+            "If using an AMD GPU, set this value to 'cuda'."
         ),
     )
     embedding_model_name: Optional[str] = Field(
-        None, description=("The embeddings model to load")
+        None,
+        description=("An initial embedding model to load on the infinity backend."),
     )
 
 
@@ -347,7 +391,13 @@ class SamplingConfig(BaseConfigModel):
     """Options for Sampling"""
 
     override_preset: Optional[str] = Field(
-        None, description=("Select a sampler override preset")
+        None,
+        description=(
+            "Select a sampler override preset (default: None).\n"
+            "Find this in the sampler-overrides folder.\n"
+            "This overrides default fallbacks for sampler values "
+            "that are passed to the API."
+        ),
     )
 
 
@@ -355,22 +405,33 @@ class DeveloperConfig(BaseConfigModel):
     """Options for development and experimentation"""
 
     unsafe_launch: Optional[bool] = Field(
-        False, description=("Skip Exllamav2 version check")
+        False,
+        description=(
+            "Skip Exllamav2 version check (default: False).\n"
+            "WARNING: It's highly recommended to update your dependencies rather "
+            "than enabling this flag."
+        ),
     )
     disable_request_streaming: Optional[bool] = Field(
-        False, description=("Disables API request streaming")
+        False, description=("Disable API request streaming (default: False).")
     )
     cuda_malloc_backend: Optional[bool] = Field(
-        False, description=("Runs with the pytorch CUDA malloc backend")
+        False, description=("Enable the torch CUDA malloc backend (default: False).")
     )
     uvloop: Optional[bool] = Field(
-        False, description=("Run asyncio using Uvloop or Winloop")
+        False,
+        description=(
+            "Run asyncio using Uvloop or Winloop which can improve performance.\n"
+            "NOTE: It's recommended to enable this, but if something breaks "
+            "turn this off."
+        ),
     )
     realtime_process_priority: Optional[bool] = Field(
         False,
         description=(
-            "Set process to use a higher priority For realtime process priority, run "
-            "as administrator or sudo Otherwise, the priority will be set to high"
+            "Set process to use a higher priority.\n"
+            "For realtime process priority, run as administrator or sudo.\n"
+            "Otherwise, the priority will be set to high."
         ),
     )
 
@@ -451,7 +512,10 @@ def generate_config_file(
             value = subfield_data.default
             value = value if value is not None else ""
             value = value if value is not PydanticUndefined else ""
-            yaml += f"{' ' * indentation}# {subfield_data.description}\n"
+
+            for line in subfield_data.description.splitlines():
+                yaml += f"{' ' * indentation}# {line}\n"
+
             yaml += f"{' ' * indentation}{subfield}: {value}\n"
 
     with open(filename, "w") as f: