From 3340c3bf2f9456270390065a35c12b53a1c8739b Mon Sep 17 00:00:00 2001 From: kingbri Date: Mon, 16 Sep 2024 00:01:30 -0400 Subject: [PATCH] Config: Rewrite descriptions This makes both config.yml and args more descriptive than before. Signed-off-by: kingbri --- common/config_models.py | 218 ++++++++++++++++++++++++++-------------- 1 file changed, 141 insertions(+), 77 deletions(-) diff --git a/common/config_models.py b/common/config_models.py index 637348c..e81b358 100644 --- a/common/config_models.py +++ b/common/config_models.py @@ -1,6 +1,6 @@ from inspect import getdoc from pathlib import Path -from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from textwrap import dedent from typing import List, Literal, Optional, Union @@ -57,18 +57,37 @@ class UtilityActions(BaseConfigModel): class NetworkConfig(BaseConfigModel): """Options for networking""" - host: Optional[str] = Field("127.0.0.1", description=("The IP to host on")) - port: Optional[int] = Field(5000, description=("The port to host on")) + host: Optional[str] = Field( + "127.0.0.1", + description=( + "The IP to host on (default: 127.0.0.1).\n" + "Use 0.0.0.0 to expose on all network adapters." + ), + ) + port: Optional[int] = Field( + 5000, description=("The port to host on (default: 5000).") + ) disable_auth: Optional[bool] = Field( - False, description=("Disable HTTP token authentication with requests") + False, + description=( + "Disable HTTP token authentication with requests.\n" + "WARNING: This will make your instance vulnerable!\n" + "Turn on this option if you are ONLY connecting from localhost." + ), ) send_tracebacks: Optional[bool] = Field( False, - description=("Decide whether to send error tracebacks over the API"), + description=( + "Send tracebacks over the API (default: False).\n" + "NOTE: Only enable this for debug purposes." + ), ) api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field( default_factory=list, - description=("API servers to enable. Options: (OAI, Kobold)"), + description=( + 'Select API servers to enable (default: ["OAI"]).\n' + "Possible values: OAI, Kobold." + ), ) @@ -79,18 +98,18 @@ class LoggingConfig(BaseConfigModel): log_prompt: Optional[bool] = Field( False, - description=("Enable prompt logging"), - validation_alias=AliasChoices("log_prompt", "prompt"), + description=("Enable prompt logging (default: False)."), ) log_generation_params: Optional[bool] = Field( False, - description=("Enable generation parameter logging"), - validation_alias=AliasChoices("log_generation_params", "generation_params"), + description=("Enable generation parameter logging (default: False)."), ) log_requests: Optional[bool] = Field( False, - description=("Enable request logging"), - validation_alias=AliasChoices("log_requests", "requests"), + description=( + "Enable request logging (default: False).\n" + "NOTE: Only use this for debugging!" + ), ) @@ -105,101 +124,117 @@ class ModelConfig(BaseConfigModel): model_dir: str = Field( "models", description=( - "Overrides the directory to look for models (default: models). Windows " - "users, do NOT put this path in quotes." + "Directory to look for models (default: models).\n" + "Windows users, do NOT put this path in quotes!" + ), + ) + inline_model_loading: Optional[bool] = Field( + True, + description=( + "Allow direct loading of models " + "from a completion or chat completion request (default: False)." ), ) use_dummy_models: Optional[bool] = Field( False, description=( - "Sends dummy model names when the models endpoint is queried. Enable this " - "if looking for specific OAI models." + "Sends dummy model names when the models endpoint is queried.\n" + "Enable this if the client is looking for specific OAI models." ), ) model_name: Optional[str] = Field( None, description=( - "An initial model to load. Make sure the model is located in the model " - "directory! REQUIRED: This must be filled out to load a model on startup." + "An initial model to load.\n" + "Make sure the model is located in the model directory!\n" + "REQUIRED: This must be filled out to load a model on startup." ), ) use_as_default: List[str] = Field( default_factory=list, description=( - "Names of args to use as a default fallback for API load requests " - "(default: []). Example: ['max_seq_len', 'cache_mode']" + "Names of args to use as a fallback for API load requests (default: []).\n" + "For example, if you always want cache_mode to be Q4 " + 'instead of on the inital model load, add "cache_mode" to this array.\n' + "Example: ['max_seq_len', 'cache_mode']." ), ) max_seq_len: Optional[int] = Field( None, description=( - "Max sequence length. Fetched from the model's base sequence length in " - "config.json by default." + "Max sequence length (default: Empty).\n" + "Fetched from the model's base sequence length in config.json by default." ), ge=0, ) override_base_seq_len: Optional[int] = Field( None, description=( - "Overrides base model context length. WARNING: Only use this if the " - "model's base sequence length is incorrect." + "Overrides base model context length (default: Empty).\n" + "WARNING: Don't set this unless you know what you're doing!\n" + "Again, do NOT use this for configuring context length, " + "use max_seq_len above ^" ), ge=0, ) tensor_parallel: Optional[bool] = Field( False, description=( - "Load model with tensor parallelism. Fallback to autosplit if GPU split " - "isn't provided." + "Load model with tensor parallelism.\n" + "Falls back to autosplit if GPU split isn't provided.\n" + "This ignores the gpu_split_auto value." ), ) gpu_split_auto: Optional[bool] = Field( True, description=( - "Automatically allocate resources to GPUs (default: True). Not parsed for " - "single GPU users." + "Automatically allocate resources to GPUs (default: True).\n" + "Not parsed for single GPU users." ), ) autosplit_reserve: List[int] = Field( [96], description=( - "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). " + "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).\n" "Represented as an array of MB per GPU." ), ) gpu_split: List[float] = Field( default_factory=list, description=( - "An integer array of GBs of VRAM to split between GPUs (default: []). " + "An integer array of GBs of VRAM to split between GPUs (default: []).\n" "Used with tensor parallelism." ), ) rope_scale: Optional[float] = Field( 1.0, description=( - "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the " - "model was trained on long context with rope." + "Rope scale (default: 1.0).\n" + "Same as compress_pos_emb.\n" + "Use if the model was trained on long context with rope.\n" + "Leave blank to pull the value from the model." ), ) rope_alpha: Optional[Union[float, Literal["auto"]]] = Field( 1.0, description=( - "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- " - "calculate." + "Rope alpha (default: 1.0).\n" + 'Same as alpha_value. Set to "auto" to auto-calculate.' ), ) cache_mode: Optional[CACHE_SIZES] = Field( "FP16", description=( - "Enable different cache modes for VRAM savings (default: FP16). Possible " - f"values: {str(CACHE_SIZES)[15:-1]}" + "Enable different cache modes for VRAM savings (default: FP16).\n" + f"Possible values: {str(CACHE_SIZES)[15:-1]}." ), ) cache_size: Optional[int] = Field( None, description=( - "Size of the prompt cache to allocate (default: max_seq_len). Must be a " - "multiple of 256." + "Size of the prompt cache to allocate (default: max_seq_len).\n" + "Must be a multiple of 256 and can't be less than max_seq_len.\n" + "For CFG, set this to 2 * max_seq_len." ), multiple_of=256, gt=0, @@ -207,39 +242,48 @@ class ModelConfig(BaseConfigModel): chunk_size: Optional[int] = Field( 2048, description=( - "Chunk size for prompt ingestion (default: 2048). A lower value reduces " - "VRAM usage but decreases ingestion speed." + "Chunk size for prompt ingestion (default: 2048).\n" + "A lower value reduces VRAM usage but decreases ingestion speed.\n" + "NOTE: Effects vary depending on the model.\n" + "An ideal value is between 512 and 4096." ), gt=0, ) max_batch_size: Optional[int] = Field( None, description=( - "Set the maximum number of prompts to process at one time (default: " - "None/Automatic). Automatically calculated if left blank." + "Set the maximum number of prompts to process at one time " + "(default: None/Automatic).\n" + "Automatically calculated if left blank.\n" + "NOTE: Only available for Nvidia ampere (30 series) and above GPUs." ), ge=1, ) prompt_template: Optional[str] = Field( None, description=( - "Set the prompt template for this model. If empty, attempts to look for " - "the model's chat template." + "Set the prompt template for this model. (default: None)\n" + "If empty, attempts to look for the model's chat template.\n" + "If a model contains multiple templates in its tokenizer_config.json,\n" + "set prompt_template to the name of the template you want to use.\n" + "NOTE: Only works with chat completion message lists!" ), ) num_experts_per_token: Optional[int] = Field( None, description=( - "Number of experts to use per token. Fetched from the model's " - "config.json. For MoE models only." + "Number of experts to use per token.\n" + "Fetched from the model's config.json if empty.\n" + "NOTE: For MoE models only.\n" + "WARNING: Don't set this unless you know what you're doing!" ), ge=1, ) fasttensors: Optional[bool] = Field( False, description=( - "Enables fasttensors to possibly increase model loading speeds (default: " - "False)." + "Enables fasttensors to possibly increase model loading speeds " + "(default: False)." ), ) @@ -256,36 +300,35 @@ class DraftModelConfig(BaseConfigModel): # TODO: convert this to a pathlib.path? draft_model_dir: Optional[str] = Field( "models", - description=( - "Overrides the directory to look for draft models (default: models)" - ), + description=("Directory to look for draft models (default: models)"), ) draft_model_name: Optional[str] = Field( None, description=( - "An initial draft model to load. Ensure the model is in the model" - "directory." + "An initial draft model to load.\n" + "Ensure the model is in the model directory." ), ) draft_rope_scale: Optional[float] = Field( 1.0, description=( - "Rope scale for draft models (default: 1.0). Same as compress_pos_emb. " + "Rope scale for draft models (default: 1.0).\n" + "Same as compress_pos_emb.\n" "Use if the draft model was trained on long context with rope." ), ) draft_rope_alpha: Optional[float] = Field( None, description=( - "Rope alpha for draft models (default: None). Same as alpha_value. Leave " - "blank to auto-calculate the alpha value." + "Rope alpha for draft models (default: None).\n" + 'Same as alpha_value. Set to "auto" to auto-calculate.' ), ) draft_cache_mode: Optional[CACHE_SIZES] = Field( "FP16", description=( - "Cache mode for draft models to save VRAM (default: FP16). Possible " - f"values: {str(CACHE_SIZES)[15:-1]}" + "Cache mode for draft models to save VRAM (default: FP16).\n" + f"Possible values: {str(CACHE_SIZES)[15:-1]}." ), ) @@ -293,10 +336,10 @@ class DraftModelConfig(BaseConfigModel): class LoraInstanceModel(BaseConfigModel): """Model representing an instance of a Lora.""" - name: str = Field(..., description=("Name of the LoRA model")) + name: str = Field(..., description=("Name of the LoRA model.")) scaling: float = Field( 1.0, - description=("Scaling factor for the LoRA model (default: 1.0)"), + description=("Scaling factor for the LoRA model (default: 1.0)."), ge=0, ) @@ -306,13 +349,13 @@ class LoraConfig(BaseConfigModel): # TODO: convert this to a pathlib.path? lora_dir: Optional[str] = Field( - "loras", description=("Directory to look for LoRAs (default: 'loras')") + "loras", description=("Directory to look for LoRAs (default: loras).") ) loras: Optional[List[LoraInstanceModel]] = Field( None, description=( - "List of LoRAs to load and associated scaling factors (default scaling: " - "1.0)" + "List of LoRAs to load and associated scaling factors " + "(default scale: 1.0)." ), ) @@ -327,19 +370,20 @@ class EmbeddingsConfig(BaseConfigModel): # TODO: convert this to a pathlib.path? embedding_model_dir: Optional[str] = Field( "models", - description=( - "Overrides directory to look for embedding models (default: models)" - ), + description=("Directory to look for embedding models (default: models)."), ) embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field( "cpu", description=( - "Device to load embedding models on (default: cpu). Possible values: cpu, " - "auto, cuda. If using an AMD GPU, set this value to 'cuda'." + "Device to load embedding models on (default: cpu).\n" + "Possible values: cpu, auto, cuda.\n" + "NOTE: It's recommended to load embedding models on the CPU.\n" + "If using an AMD GPU, set this value to 'cuda'." ), ) embedding_model_name: Optional[str] = Field( - None, description=("The embeddings model to load") + None, + description=("An initial embedding model to load on the infinity backend."), ) @@ -347,7 +391,13 @@ class SamplingConfig(BaseConfigModel): """Options for Sampling""" override_preset: Optional[str] = Field( - None, description=("Select a sampler override preset") + None, + description=( + "Select a sampler override preset (default: None).\n" + "Find this in the sampler-overrides folder.\n" + "This overrides default fallbacks for sampler values " + "that are passed to the API." + ), ) @@ -355,22 +405,33 @@ class DeveloperConfig(BaseConfigModel): """Options for development and experimentation""" unsafe_launch: Optional[bool] = Field( - False, description=("Skip Exllamav2 version check") + False, + description=( + "Skip Exllamav2 version check (default: False).\n" + "WARNING: It's highly recommended to update your dependencies rather " + "than enabling this flag." + ), ) disable_request_streaming: Optional[bool] = Field( - False, description=("Disables API request streaming") + False, description=("Disable API request streaming (default: False).") ) cuda_malloc_backend: Optional[bool] = Field( - False, description=("Runs with the pytorch CUDA malloc backend") + False, description=("Enable the torch CUDA malloc backend (default: False).") ) uvloop: Optional[bool] = Field( - False, description=("Run asyncio using Uvloop or Winloop") + False, + description=( + "Run asyncio using Uvloop or Winloop which can improve performance.\n" + "NOTE: It's recommended to enable this, but if something breaks " + "turn this off." + ), ) realtime_process_priority: Optional[bool] = Field( False, description=( - "Set process to use a higher priority For realtime process priority, run " - "as administrator or sudo Otherwise, the priority will be set to high" + "Set process to use a higher priority.\n" + "For realtime process priority, run as administrator or sudo.\n" + "Otherwise, the priority will be set to high." ), ) @@ -451,7 +512,10 @@ def generate_config_file( value = subfield_data.default value = value if value is not None else "" value = value if value is not PydanticUndefined else "" - yaml += f"{' ' * indentation}# {subfield_data.description}\n" + + for line in subfield_data.description.splitlines(): + yaml += f"{' ' * indentation}# {line}\n" + yaml += f"{' ' * indentation}{subfield}: {value}\n" with open(filename, "w") as f: