Config: Rewrite descriptions
This makes both config.yml and args more descriptive than before. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
4c8bb42ec1
commit
3340c3bf2f
1 changed files with 141 additions and 77 deletions
|
|
@ -1,6 +1,6 @@
|
|||
from inspect import getdoc
|
||||
from pathlib import Path
|
||||
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
from textwrap import dedent
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
|
|
@ -57,18 +57,37 @@ class UtilityActions(BaseConfigModel):
|
|||
class NetworkConfig(BaseConfigModel):
|
||||
"""Options for networking"""
|
||||
|
||||
host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
|
||||
port: Optional[int] = Field(5000, description=("The port to host on"))
|
||||
host: Optional[str] = Field(
|
||||
"127.0.0.1",
|
||||
description=(
|
||||
"The IP to host on (default: 127.0.0.1).\n"
|
||||
"Use 0.0.0.0 to expose on all network adapters."
|
||||
),
|
||||
)
|
||||
port: Optional[int] = Field(
|
||||
5000, description=("The port to host on (default: 5000).")
|
||||
)
|
||||
disable_auth: Optional[bool] = Field(
|
||||
False, description=("Disable HTTP token authentication with requests")
|
||||
False,
|
||||
description=(
|
||||
"Disable HTTP token authentication with requests.\n"
|
||||
"WARNING: This will make your instance vulnerable!\n"
|
||||
"Turn on this option if you are ONLY connecting from localhost."
|
||||
),
|
||||
)
|
||||
send_tracebacks: Optional[bool] = Field(
|
||||
False,
|
||||
description=("Decide whether to send error tracebacks over the API"),
|
||||
description=(
|
||||
"Send tracebacks over the API (default: False).\n"
|
||||
"NOTE: Only enable this for debug purposes."
|
||||
),
|
||||
)
|
||||
api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
|
||||
default_factory=list,
|
||||
description=("API servers to enable. Options: (OAI, Kobold)"),
|
||||
description=(
|
||||
'Select API servers to enable (default: ["OAI"]).\n'
|
||||
"Possible values: OAI, Kobold."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -79,18 +98,18 @@ class LoggingConfig(BaseConfigModel):
|
|||
|
||||
log_prompt: Optional[bool] = Field(
|
||||
False,
|
||||
description=("Enable prompt logging"),
|
||||
validation_alias=AliasChoices("log_prompt", "prompt"),
|
||||
description=("Enable prompt logging (default: False)."),
|
||||
)
|
||||
log_generation_params: Optional[bool] = Field(
|
||||
False,
|
||||
description=("Enable generation parameter logging"),
|
||||
validation_alias=AliasChoices("log_generation_params", "generation_params"),
|
||||
description=("Enable generation parameter logging (default: False)."),
|
||||
)
|
||||
log_requests: Optional[bool] = Field(
|
||||
False,
|
||||
description=("Enable request logging"),
|
||||
validation_alias=AliasChoices("log_requests", "requests"),
|
||||
description=(
|
||||
"Enable request logging (default: False).\n"
|
||||
"NOTE: Only use this for debugging!"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -105,101 +124,117 @@ class ModelConfig(BaseConfigModel):
|
|||
model_dir: str = Field(
|
||||
"models",
|
||||
description=(
|
||||
"Overrides the directory to look for models (default: models). Windows "
|
||||
"users, do NOT put this path in quotes."
|
||||
"Directory to look for models (default: models).\n"
|
||||
"Windows users, do NOT put this path in quotes!"
|
||||
),
|
||||
)
|
||||
inline_model_loading: Optional[bool] = Field(
|
||||
True,
|
||||
description=(
|
||||
"Allow direct loading of models "
|
||||
"from a completion or chat completion request (default: False)."
|
||||
),
|
||||
)
|
||||
use_dummy_models: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
"Sends dummy model names when the models endpoint is queried. Enable this "
|
||||
"if looking for specific OAI models."
|
||||
"Sends dummy model names when the models endpoint is queried.\n"
|
||||
"Enable this if the client is looking for specific OAI models."
|
||||
),
|
||||
)
|
||||
model_name: Optional[str] = Field(
|
||||
None,
|
||||
description=(
|
||||
"An initial model to load. Make sure the model is located in the model "
|
||||
"directory! REQUIRED: This must be filled out to load a model on startup."
|
||||
"An initial model to load.\n"
|
||||
"Make sure the model is located in the model directory!\n"
|
||||
"REQUIRED: This must be filled out to load a model on startup."
|
||||
),
|
||||
)
|
||||
use_as_default: List[str] = Field(
|
||||
default_factory=list,
|
||||
description=(
|
||||
"Names of args to use as a default fallback for API load requests "
|
||||
"(default: []). Example: ['max_seq_len', 'cache_mode']"
|
||||
"Names of args to use as a fallback for API load requests (default: []).\n"
|
||||
"For example, if you always want cache_mode to be Q4 "
|
||||
'instead of on the inital model load, add "cache_mode" to this array.\n'
|
||||
"Example: ['max_seq_len', 'cache_mode']."
|
||||
),
|
||||
)
|
||||
max_seq_len: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Max sequence length. Fetched from the model's base sequence length in "
|
||||
"config.json by default."
|
||||
"Max sequence length (default: Empty).\n"
|
||||
"Fetched from the model's base sequence length in config.json by default."
|
||||
),
|
||||
ge=0,
|
||||
)
|
||||
override_base_seq_len: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Overrides base model context length. WARNING: Only use this if the "
|
||||
"model's base sequence length is incorrect."
|
||||
"Overrides base model context length (default: Empty).\n"
|
||||
"WARNING: Don't set this unless you know what you're doing!\n"
|
||||
"Again, do NOT use this for configuring context length, "
|
||||
"use max_seq_len above ^"
|
||||
),
|
||||
ge=0,
|
||||
)
|
||||
tensor_parallel: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
"Load model with tensor parallelism. Fallback to autosplit if GPU split "
|
||||
"isn't provided."
|
||||
"Load model with tensor parallelism.\n"
|
||||
"Falls back to autosplit if GPU split isn't provided.\n"
|
||||
"This ignores the gpu_split_auto value."
|
||||
),
|
||||
)
|
||||
gpu_split_auto: Optional[bool] = Field(
|
||||
True,
|
||||
description=(
|
||||
"Automatically allocate resources to GPUs (default: True). Not parsed for "
|
||||
"single GPU users."
|
||||
"Automatically allocate resources to GPUs (default: True).\n"
|
||||
"Not parsed for single GPU users."
|
||||
),
|
||||
)
|
||||
autosplit_reserve: List[int] = Field(
|
||||
[96],
|
||||
description=(
|
||||
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
|
||||
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).\n"
|
||||
"Represented as an array of MB per GPU."
|
||||
),
|
||||
)
|
||||
gpu_split: List[float] = Field(
|
||||
default_factory=list,
|
||||
description=(
|
||||
"An integer array of GBs of VRAM to split between GPUs (default: []). "
|
||||
"An integer array of GBs of VRAM to split between GPUs (default: []).\n"
|
||||
"Used with tensor parallelism."
|
||||
),
|
||||
)
|
||||
rope_scale: Optional[float] = Field(
|
||||
1.0,
|
||||
description=(
|
||||
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
|
||||
"model was trained on long context with rope."
|
||||
"Rope scale (default: 1.0).\n"
|
||||
"Same as compress_pos_emb.\n"
|
||||
"Use if the model was trained on long context with rope.\n"
|
||||
"Leave blank to pull the value from the model."
|
||||
),
|
||||
)
|
||||
rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
|
||||
1.0,
|
||||
description=(
|
||||
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
|
||||
"calculate."
|
||||
"Rope alpha (default: 1.0).\n"
|
||||
'Same as alpha_value. Set to "auto" to auto-calculate.'
|
||||
),
|
||||
)
|
||||
cache_mode: Optional[CACHE_SIZES] = Field(
|
||||
"FP16",
|
||||
description=(
|
||||
"Enable different cache modes for VRAM savings (default: FP16). Possible "
|
||||
f"values: {str(CACHE_SIZES)[15:-1]}"
|
||||
"Enable different cache modes for VRAM savings (default: FP16).\n"
|
||||
f"Possible values: {str(CACHE_SIZES)[15:-1]}."
|
||||
),
|
||||
)
|
||||
cache_size: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Size of the prompt cache to allocate (default: max_seq_len). Must be a "
|
||||
"multiple of 256."
|
||||
"Size of the prompt cache to allocate (default: max_seq_len).\n"
|
||||
"Must be a multiple of 256 and can't be less than max_seq_len.\n"
|
||||
"For CFG, set this to 2 * max_seq_len."
|
||||
),
|
||||
multiple_of=256,
|
||||
gt=0,
|
||||
|
|
@ -207,39 +242,48 @@ class ModelConfig(BaseConfigModel):
|
|||
chunk_size: Optional[int] = Field(
|
||||
2048,
|
||||
description=(
|
||||
"Chunk size for prompt ingestion (default: 2048). A lower value reduces "
|
||||
"VRAM usage but decreases ingestion speed."
|
||||
"Chunk size for prompt ingestion (default: 2048).\n"
|
||||
"A lower value reduces VRAM usage but decreases ingestion speed.\n"
|
||||
"NOTE: Effects vary depending on the model.\n"
|
||||
"An ideal value is between 512 and 4096."
|
||||
),
|
||||
gt=0,
|
||||
)
|
||||
max_batch_size: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Set the maximum number of prompts to process at one time (default: "
|
||||
"None/Automatic). Automatically calculated if left blank."
|
||||
"Set the maximum number of prompts to process at one time "
|
||||
"(default: None/Automatic).\n"
|
||||
"Automatically calculated if left blank.\n"
|
||||
"NOTE: Only available for Nvidia ampere (30 series) and above GPUs."
|
||||
),
|
||||
ge=1,
|
||||
)
|
||||
prompt_template: Optional[str] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Set the prompt template for this model. If empty, attempts to look for "
|
||||
"the model's chat template."
|
||||
"Set the prompt template for this model. (default: None)\n"
|
||||
"If empty, attempts to look for the model's chat template.\n"
|
||||
"If a model contains multiple templates in its tokenizer_config.json,\n"
|
||||
"set prompt_template to the name of the template you want to use.\n"
|
||||
"NOTE: Only works with chat completion message lists!"
|
||||
),
|
||||
)
|
||||
num_experts_per_token: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Number of experts to use per token. Fetched from the model's "
|
||||
"config.json. For MoE models only."
|
||||
"Number of experts to use per token.\n"
|
||||
"Fetched from the model's config.json if empty.\n"
|
||||
"NOTE: For MoE models only.\n"
|
||||
"WARNING: Don't set this unless you know what you're doing!"
|
||||
),
|
||||
ge=1,
|
||||
)
|
||||
fasttensors: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
"Enables fasttensors to possibly increase model loading speeds (default: "
|
||||
"False)."
|
||||
"Enables fasttensors to possibly increase model loading speeds "
|
||||
"(default: False)."
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -256,36 +300,35 @@ class DraftModelConfig(BaseConfigModel):
|
|||
# TODO: convert this to a pathlib.path?
|
||||
draft_model_dir: Optional[str] = Field(
|
||||
"models",
|
||||
description=(
|
||||
"Overrides the directory to look for draft models (default: models)"
|
||||
),
|
||||
description=("Directory to look for draft models (default: models)"),
|
||||
)
|
||||
draft_model_name: Optional[str] = Field(
|
||||
None,
|
||||
description=(
|
||||
"An initial draft model to load. Ensure the model is in the model"
|
||||
"directory."
|
||||
"An initial draft model to load.\n"
|
||||
"Ensure the model is in the model directory."
|
||||
),
|
||||
)
|
||||
draft_rope_scale: Optional[float] = Field(
|
||||
1.0,
|
||||
description=(
|
||||
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
|
||||
"Rope scale for draft models (default: 1.0).\n"
|
||||
"Same as compress_pos_emb.\n"
|
||||
"Use if the draft model was trained on long context with rope."
|
||||
),
|
||||
)
|
||||
draft_rope_alpha: Optional[float] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Rope alpha for draft models (default: None). Same as alpha_value. Leave "
|
||||
"blank to auto-calculate the alpha value."
|
||||
"Rope alpha for draft models (default: None).\n"
|
||||
'Same as alpha_value. Set to "auto" to auto-calculate.'
|
||||
),
|
||||
)
|
||||
draft_cache_mode: Optional[CACHE_SIZES] = Field(
|
||||
"FP16",
|
||||
description=(
|
||||
"Cache mode for draft models to save VRAM (default: FP16). Possible "
|
||||
f"values: {str(CACHE_SIZES)[15:-1]}"
|
||||
"Cache mode for draft models to save VRAM (default: FP16).\n"
|
||||
f"Possible values: {str(CACHE_SIZES)[15:-1]}."
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -293,10 +336,10 @@ class DraftModelConfig(BaseConfigModel):
|
|||
class LoraInstanceModel(BaseConfigModel):
|
||||
"""Model representing an instance of a Lora."""
|
||||
|
||||
name: str = Field(..., description=("Name of the LoRA model"))
|
||||
name: str = Field(..., description=("Name of the LoRA model."))
|
||||
scaling: float = Field(
|
||||
1.0,
|
||||
description=("Scaling factor for the LoRA model (default: 1.0)"),
|
||||
description=("Scaling factor for the LoRA model (default: 1.0)."),
|
||||
ge=0,
|
||||
)
|
||||
|
||||
|
|
@ -306,13 +349,13 @@ class LoraConfig(BaseConfigModel):
|
|||
|
||||
# TODO: convert this to a pathlib.path?
|
||||
lora_dir: Optional[str] = Field(
|
||||
"loras", description=("Directory to look for LoRAs (default: 'loras')")
|
||||
"loras", description=("Directory to look for LoRAs (default: loras).")
|
||||
)
|
||||
loras: Optional[List[LoraInstanceModel]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"List of LoRAs to load and associated scaling factors (default scaling: "
|
||||
"1.0)"
|
||||
"List of LoRAs to load and associated scaling factors "
|
||||
"(default scale: 1.0)."
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -327,19 +370,20 @@ class EmbeddingsConfig(BaseConfigModel):
|
|||
# TODO: convert this to a pathlib.path?
|
||||
embedding_model_dir: Optional[str] = Field(
|
||||
"models",
|
||||
description=(
|
||||
"Overrides directory to look for embedding models (default: models)"
|
||||
),
|
||||
description=("Directory to look for embedding models (default: models)."),
|
||||
)
|
||||
embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
|
||||
"cpu",
|
||||
description=(
|
||||
"Device to load embedding models on (default: cpu). Possible values: cpu, "
|
||||
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
|
||||
"Device to load embedding models on (default: cpu).\n"
|
||||
"Possible values: cpu, auto, cuda.\n"
|
||||
"NOTE: It's recommended to load embedding models on the CPU.\n"
|
||||
"If using an AMD GPU, set this value to 'cuda'."
|
||||
),
|
||||
)
|
||||
embedding_model_name: Optional[str] = Field(
|
||||
None, description=("The embeddings model to load")
|
||||
None,
|
||||
description=("An initial embedding model to load on the infinity backend."),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -347,7 +391,13 @@ class SamplingConfig(BaseConfigModel):
|
|||
"""Options for Sampling"""
|
||||
|
||||
override_preset: Optional[str] = Field(
|
||||
None, description=("Select a sampler override preset")
|
||||
None,
|
||||
description=(
|
||||
"Select a sampler override preset (default: None).\n"
|
||||
"Find this in the sampler-overrides folder.\n"
|
||||
"This overrides default fallbacks for sampler values "
|
||||
"that are passed to the API."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -355,22 +405,33 @@ class DeveloperConfig(BaseConfigModel):
|
|||
"""Options for development and experimentation"""
|
||||
|
||||
unsafe_launch: Optional[bool] = Field(
|
||||
False, description=("Skip Exllamav2 version check")
|
||||
False,
|
||||
description=(
|
||||
"Skip Exllamav2 version check (default: False).\n"
|
||||
"WARNING: It's highly recommended to update your dependencies rather "
|
||||
"than enabling this flag."
|
||||
),
|
||||
)
|
||||
disable_request_streaming: Optional[bool] = Field(
|
||||
False, description=("Disables API request streaming")
|
||||
False, description=("Disable API request streaming (default: False).")
|
||||
)
|
||||
cuda_malloc_backend: Optional[bool] = Field(
|
||||
False, description=("Runs with the pytorch CUDA malloc backend")
|
||||
False, description=("Enable the torch CUDA malloc backend (default: False).")
|
||||
)
|
||||
uvloop: Optional[bool] = Field(
|
||||
False, description=("Run asyncio using Uvloop or Winloop")
|
||||
False,
|
||||
description=(
|
||||
"Run asyncio using Uvloop or Winloop which can improve performance.\n"
|
||||
"NOTE: It's recommended to enable this, but if something breaks "
|
||||
"turn this off."
|
||||
),
|
||||
)
|
||||
realtime_process_priority: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
"Set process to use a higher priority For realtime process priority, run "
|
||||
"as administrator or sudo Otherwise, the priority will be set to high"
|
||||
"Set process to use a higher priority.\n"
|
||||
"For realtime process priority, run as administrator or sudo.\n"
|
||||
"Otherwise, the priority will be set to high."
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -451,7 +512,10 @@ def generate_config_file(
|
|||
value = subfield_data.default
|
||||
value = value if value is not None else ""
|
||||
value = value if value is not PydanticUndefined else ""
|
||||
yaml += f"{' ' * indentation}# {subfield_data.description}\n"
|
||||
|
||||
for line in subfield_data.description.splitlines():
|
||||
yaml += f"{' ' * indentation}# {line}\n"
|
||||
|
||||
yaml += f"{' ' * indentation}{subfield}: {value}\n"
|
||||
|
||||
with open(filename, "w") as f:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue