Config: Rewrite descriptions

This makes both config.yml and args more descriptive than before.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-09-16 00:01:30 -04:00
parent 4c8bb42ec1
commit 3340c3bf2f

View file

@ -1,6 +1,6 @@
from inspect import getdoc
from pathlib import Path
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
from textwrap import dedent
from typing import List, Literal, Optional, Union
@ -57,18 +57,37 @@ class UtilityActions(BaseConfigModel):
class NetworkConfig(BaseConfigModel):
"""Options for networking"""
host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
port: Optional[int] = Field(5000, description=("The port to host on"))
host: Optional[str] = Field(
"127.0.0.1",
description=(
"The IP to host on (default: 127.0.0.1).\n"
"Use 0.0.0.0 to expose on all network adapters."
),
)
port: Optional[int] = Field(
5000, description=("The port to host on (default: 5000).")
)
disable_auth: Optional[bool] = Field(
False, description=("Disable HTTP token authentication with requests")
False,
description=(
"Disable HTTP token authentication with requests.\n"
"WARNING: This will make your instance vulnerable!\n"
"Turn on this option if you are ONLY connecting from localhost."
),
)
send_tracebacks: Optional[bool] = Field(
False,
description=("Decide whether to send error tracebacks over the API"),
description=(
"Send tracebacks over the API (default: False).\n"
"NOTE: Only enable this for debug purposes."
),
)
api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
default_factory=list,
description=("API servers to enable. Options: (OAI, Kobold)"),
description=(
'Select API servers to enable (default: ["OAI"]).\n'
"Possible values: OAI, Kobold."
),
)
@ -79,18 +98,18 @@ class LoggingConfig(BaseConfigModel):
log_prompt: Optional[bool] = Field(
False,
description=("Enable prompt logging"),
validation_alias=AliasChoices("log_prompt", "prompt"),
description=("Enable prompt logging (default: False)."),
)
log_generation_params: Optional[bool] = Field(
False,
description=("Enable generation parameter logging"),
validation_alias=AliasChoices("log_generation_params", "generation_params"),
description=("Enable generation parameter logging (default: False)."),
)
log_requests: Optional[bool] = Field(
False,
description=("Enable request logging"),
validation_alias=AliasChoices("log_requests", "requests"),
description=(
"Enable request logging (default: False).\n"
"NOTE: Only use this for debugging!"
),
)
@ -105,101 +124,117 @@ class ModelConfig(BaseConfigModel):
model_dir: str = Field(
"models",
description=(
"Overrides the directory to look for models (default: models). Windows "
"users, do NOT put this path in quotes."
"Directory to look for models (default: models).\n"
"Windows users, do NOT put this path in quotes!"
),
)
inline_model_loading: Optional[bool] = Field(
True,
description=(
"Allow direct loading of models "
"from a completion or chat completion request (default: False)."
),
)
use_dummy_models: Optional[bool] = Field(
False,
description=(
"Sends dummy model names when the models endpoint is queried. Enable this "
"if looking for specific OAI models."
"Sends dummy model names when the models endpoint is queried.\n"
"Enable this if the client is looking for specific OAI models."
),
)
model_name: Optional[str] = Field(
None,
description=(
"An initial model to load. Make sure the model is located in the model "
"directory! REQUIRED: This must be filled out to load a model on startup."
"An initial model to load.\n"
"Make sure the model is located in the model directory!\n"
"REQUIRED: This must be filled out to load a model on startup."
),
)
use_as_default: List[str] = Field(
default_factory=list,
description=(
"Names of args to use as a default fallback for API load requests "
"(default: []). Example: ['max_seq_len', 'cache_mode']"
"Names of args to use as a fallback for API load requests (default: []).\n"
"For example, if you always want cache_mode to be Q4 "
'instead of on the inital model load, add "cache_mode" to this array.\n'
"Example: ['max_seq_len', 'cache_mode']."
),
)
max_seq_len: Optional[int] = Field(
None,
description=(
"Max sequence length. Fetched from the model's base sequence length in "
"config.json by default."
"Max sequence length (default: Empty).\n"
"Fetched from the model's base sequence length in config.json by default."
),
ge=0,
)
override_base_seq_len: Optional[int] = Field(
None,
description=(
"Overrides base model context length. WARNING: Only use this if the "
"model's base sequence length is incorrect."
"Overrides base model context length (default: Empty).\n"
"WARNING: Don't set this unless you know what you're doing!\n"
"Again, do NOT use this for configuring context length, "
"use max_seq_len above ^"
),
ge=0,
)
tensor_parallel: Optional[bool] = Field(
False,
description=(
"Load model with tensor parallelism. Fallback to autosplit if GPU split "
"isn't provided."
"Load model with tensor parallelism.\n"
"Falls back to autosplit if GPU split isn't provided.\n"
"This ignores the gpu_split_auto value."
),
)
gpu_split_auto: Optional[bool] = Field(
True,
description=(
"Automatically allocate resources to GPUs (default: True). Not parsed for "
"single GPU users."
"Automatically allocate resources to GPUs (default: True).\n"
"Not parsed for single GPU users."
),
)
autosplit_reserve: List[int] = Field(
[96],
description=(
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).\n"
"Represented as an array of MB per GPU."
),
)
gpu_split: List[float] = Field(
default_factory=list,
description=(
"An integer array of GBs of VRAM to split between GPUs (default: []). "
"An integer array of GBs of VRAM to split between GPUs (default: []).\n"
"Used with tensor parallelism."
),
)
rope_scale: Optional[float] = Field(
1.0,
description=(
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
"model was trained on long context with rope."
"Rope scale (default: 1.0).\n"
"Same as compress_pos_emb.\n"
"Use if the model was trained on long context with rope.\n"
"Leave blank to pull the value from the model."
),
)
rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
1.0,
description=(
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
"calculate."
"Rope alpha (default: 1.0).\n"
'Same as alpha_value. Set to "auto" to auto-calculate.'
),
)
cache_mode: Optional[CACHE_SIZES] = Field(
"FP16",
description=(
"Enable different cache modes for VRAM savings (default: FP16). Possible "
f"values: {str(CACHE_SIZES)[15:-1]}"
"Enable different cache modes for VRAM savings (default: FP16).\n"
f"Possible values: {str(CACHE_SIZES)[15:-1]}."
),
)
cache_size: Optional[int] = Field(
None,
description=(
"Size of the prompt cache to allocate (default: max_seq_len). Must be a "
"multiple of 256."
"Size of the prompt cache to allocate (default: max_seq_len).\n"
"Must be a multiple of 256 and can't be less than max_seq_len.\n"
"For CFG, set this to 2 * max_seq_len."
),
multiple_of=256,
gt=0,
@ -207,39 +242,48 @@ class ModelConfig(BaseConfigModel):
chunk_size: Optional[int] = Field(
2048,
description=(
"Chunk size for prompt ingestion (default: 2048). A lower value reduces "
"VRAM usage but decreases ingestion speed."
"Chunk size for prompt ingestion (default: 2048).\n"
"A lower value reduces VRAM usage but decreases ingestion speed.\n"
"NOTE: Effects vary depending on the model.\n"
"An ideal value is between 512 and 4096."
),
gt=0,
)
max_batch_size: Optional[int] = Field(
None,
description=(
"Set the maximum number of prompts to process at one time (default: "
"None/Automatic). Automatically calculated if left blank."
"Set the maximum number of prompts to process at one time "
"(default: None/Automatic).\n"
"Automatically calculated if left blank.\n"
"NOTE: Only available for Nvidia ampere (30 series) and above GPUs."
),
ge=1,
)
prompt_template: Optional[str] = Field(
None,
description=(
"Set the prompt template for this model. If empty, attempts to look for "
"the model's chat template."
"Set the prompt template for this model. (default: None)\n"
"If empty, attempts to look for the model's chat template.\n"
"If a model contains multiple templates in its tokenizer_config.json,\n"
"set prompt_template to the name of the template you want to use.\n"
"NOTE: Only works with chat completion message lists!"
),
)
num_experts_per_token: Optional[int] = Field(
None,
description=(
"Number of experts to use per token. Fetched from the model's "
"config.json. For MoE models only."
"Number of experts to use per token.\n"
"Fetched from the model's config.json if empty.\n"
"NOTE: For MoE models only.\n"
"WARNING: Don't set this unless you know what you're doing!"
),
ge=1,
)
fasttensors: Optional[bool] = Field(
False,
description=(
"Enables fasttensors to possibly increase model loading speeds (default: "
"False)."
"Enables fasttensors to possibly increase model loading speeds "
"(default: False)."
),
)
@ -256,36 +300,35 @@ class DraftModelConfig(BaseConfigModel):
# TODO: convert this to a pathlib.path?
draft_model_dir: Optional[str] = Field(
"models",
description=(
"Overrides the directory to look for draft models (default: models)"
),
description=("Directory to look for draft models (default: models)"),
)
draft_model_name: Optional[str] = Field(
None,
description=(
"An initial draft model to load. Ensure the model is in the model"
"directory."
"An initial draft model to load.\n"
"Ensure the model is in the model directory."
),
)
draft_rope_scale: Optional[float] = Field(
1.0,
description=(
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
"Rope scale for draft models (default: 1.0).\n"
"Same as compress_pos_emb.\n"
"Use if the draft model was trained on long context with rope."
),
)
draft_rope_alpha: Optional[float] = Field(
None,
description=(
"Rope alpha for draft models (default: None). Same as alpha_value. Leave "
"blank to auto-calculate the alpha value."
"Rope alpha for draft models (default: None).\n"
'Same as alpha_value. Set to "auto" to auto-calculate.'
),
)
draft_cache_mode: Optional[CACHE_SIZES] = Field(
"FP16",
description=(
"Cache mode for draft models to save VRAM (default: FP16). Possible "
f"values: {str(CACHE_SIZES)[15:-1]}"
"Cache mode for draft models to save VRAM (default: FP16).\n"
f"Possible values: {str(CACHE_SIZES)[15:-1]}."
),
)
@ -293,10 +336,10 @@ class DraftModelConfig(BaseConfigModel):
class LoraInstanceModel(BaseConfigModel):
"""Model representing an instance of a Lora."""
name: str = Field(..., description=("Name of the LoRA model"))
name: str = Field(..., description=("Name of the LoRA model."))
scaling: float = Field(
1.0,
description=("Scaling factor for the LoRA model (default: 1.0)"),
description=("Scaling factor for the LoRA model (default: 1.0)."),
ge=0,
)
@ -306,13 +349,13 @@ class LoraConfig(BaseConfigModel):
# TODO: convert this to a pathlib.path?
lora_dir: Optional[str] = Field(
"loras", description=("Directory to look for LoRAs (default: 'loras')")
"loras", description=("Directory to look for LoRAs (default: loras).")
)
loras: Optional[List[LoraInstanceModel]] = Field(
None,
description=(
"List of LoRAs to load and associated scaling factors (default scaling: "
"1.0)"
"List of LoRAs to load and associated scaling factors "
"(default scale: 1.0)."
),
)
@ -327,19 +370,20 @@ class EmbeddingsConfig(BaseConfigModel):
# TODO: convert this to a pathlib.path?
embedding_model_dir: Optional[str] = Field(
"models",
description=(
"Overrides directory to look for embedding models (default: models)"
),
description=("Directory to look for embedding models (default: models)."),
)
embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
"cpu",
description=(
"Device to load embedding models on (default: cpu). Possible values: cpu, "
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
"Device to load embedding models on (default: cpu).\n"
"Possible values: cpu, auto, cuda.\n"
"NOTE: It's recommended to load embedding models on the CPU.\n"
"If using an AMD GPU, set this value to 'cuda'."
),
)
embedding_model_name: Optional[str] = Field(
None, description=("The embeddings model to load")
None,
description=("An initial embedding model to load on the infinity backend."),
)
@ -347,7 +391,13 @@ class SamplingConfig(BaseConfigModel):
"""Options for Sampling"""
override_preset: Optional[str] = Field(
None, description=("Select a sampler override preset")
None,
description=(
"Select a sampler override preset (default: None).\n"
"Find this in the sampler-overrides folder.\n"
"This overrides default fallbacks for sampler values "
"that are passed to the API."
),
)
@ -355,22 +405,33 @@ class DeveloperConfig(BaseConfigModel):
"""Options for development and experimentation"""
unsafe_launch: Optional[bool] = Field(
False, description=("Skip Exllamav2 version check")
False,
description=(
"Skip Exllamav2 version check (default: False).\n"
"WARNING: It's highly recommended to update your dependencies rather "
"than enabling this flag."
),
)
disable_request_streaming: Optional[bool] = Field(
False, description=("Disables API request streaming")
False, description=("Disable API request streaming (default: False).")
)
cuda_malloc_backend: Optional[bool] = Field(
False, description=("Runs with the pytorch CUDA malloc backend")
False, description=("Enable the torch CUDA malloc backend (default: False).")
)
uvloop: Optional[bool] = Field(
False, description=("Run asyncio using Uvloop or Winloop")
False,
description=(
"Run asyncio using Uvloop or Winloop which can improve performance.\n"
"NOTE: It's recommended to enable this, but if something breaks "
"turn this off."
),
)
realtime_process_priority: Optional[bool] = Field(
False,
description=(
"Set process to use a higher priority For realtime process priority, run "
"as administrator or sudo Otherwise, the priority will be set to high"
"Set process to use a higher priority.\n"
"For realtime process priority, run as administrator or sudo.\n"
"Otherwise, the priority will be set to high."
),
)
@ -451,7 +512,10 @@ def generate_config_file(
value = subfield_data.default
value = value if value is not None else ""
value = value if value is not PydanticUndefined else ""
yaml += f"{' ' * indentation}# {subfield_data.description}\n"
for line in subfield_data.description.splitlines():
yaml += f"{' ' * indentation}# {line}\n"
yaml += f"{' ' * indentation}{subfield}: {value}\n"
with open(filename, "w") as f: