Adheres to the old config.yml's descriptions and allows for newlines in generated YAML. Signed-off-by: kingbri <bdashore3@proton.me>
458 lines
15 KiB
Python
458 lines
15 KiB
Python
from inspect import getdoc
|
|
from pathlib import Path
|
|
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
|
|
from textwrap import dedent
|
|
from typing import List, Literal, Optional, Union
|
|
|
|
from pydantic_core import PydanticUndefined
|
|
|
|
CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
|
|
|
|
|
|
class Metadata(BaseModel):
|
|
"""metadata model for config options"""
|
|
|
|
include_in_config: Optional[bool] = Field(True)
|
|
|
|
|
|
class BaseConfigModel(BaseModel):
|
|
"""Base model for config models with added metadata"""
|
|
|
|
_metadata: Metadata = PrivateAttr(Metadata())
|
|
|
|
|
|
class ConfigOverrideConfig(BaseConfigModel):
|
|
"""Model for overriding a provided config file."""
|
|
|
|
# TODO: convert this to a pathlib.path?
|
|
config: Optional[str] = Field(
|
|
None, description=("Path to an overriding config.yml file")
|
|
)
|
|
|
|
_metadata: Metadata = PrivateAttr(Metadata(include_in_config=False))
|
|
|
|
|
|
class UtilityActions(BaseConfigModel):
|
|
"""Model used for arg actions."""
|
|
|
|
# YAML export options
|
|
export_config: Optional[str] = Field(
|
|
None, description="generate a template config file"
|
|
)
|
|
config_export_path: Optional[Path] = Field(
|
|
"config_sample.yml", description="path to export configuration file to"
|
|
)
|
|
|
|
# OpenAPI JSON export options
|
|
export_openapi: Optional[bool] = Field(
|
|
False, description="export openapi schema files"
|
|
)
|
|
openapi_export_path: Optional[Path] = Field(
|
|
"openapi.json", description="path to export openapi schema to"
|
|
)
|
|
|
|
_metadata: Metadata = PrivateAttr(Metadata(include_in_config=False))
|
|
|
|
|
|
class NetworkConfig(BaseConfigModel):
|
|
"""Options for networking"""
|
|
|
|
host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
|
|
port: Optional[int] = Field(5000, description=("The port to host on"))
|
|
disable_auth: Optional[bool] = Field(
|
|
False, description=("Disable HTTP token authentication with requests")
|
|
)
|
|
send_tracebacks: Optional[bool] = Field(
|
|
False,
|
|
description=("Decide whether to send error tracebacks over the API"),
|
|
)
|
|
api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
|
|
default_factory=list,
|
|
description=("API servers to enable. Options: (OAI, Kobold)"),
|
|
)
|
|
|
|
|
|
# TODO: Migrate config.yml to have the log_ prefix
|
|
# This is a breaking change.
|
|
class LoggingConfig(BaseConfigModel):
|
|
"""Options for logging"""
|
|
|
|
log_prompt: Optional[bool] = Field(
|
|
False,
|
|
description=("Enable prompt logging"),
|
|
validation_alias=AliasChoices("log_prompt", "prompt"),
|
|
)
|
|
log_generation_params: Optional[bool] = Field(
|
|
False,
|
|
description=("Enable generation parameter logging"),
|
|
validation_alias=AliasChoices("log_generation_params", "generation_params"),
|
|
)
|
|
log_requests: Optional[bool] = Field(
|
|
False,
|
|
description=("Enable request logging"),
|
|
validation_alias=AliasChoices("log_requests", "requests"),
|
|
)
|
|
|
|
|
|
class ModelConfig(BaseConfigModel):
|
|
"""
|
|
Options for model overrides and loading
|
|
Please read the comments to understand how arguments are handled
|
|
between initial and API loads
|
|
"""
|
|
|
|
# TODO: convert this to a pathlib.path?
|
|
model_dir: str = Field(
|
|
"models",
|
|
description=(
|
|
"Overrides the directory to look for models (default: models). Windows "
|
|
"users, do NOT put this path in quotes."
|
|
),
|
|
)
|
|
use_dummy_models: Optional[bool] = Field(
|
|
False,
|
|
description=(
|
|
"Sends dummy model names when the models endpoint is queried. Enable this "
|
|
"if looking for specific OAI models."
|
|
),
|
|
)
|
|
model_name: Optional[str] = Field(
|
|
None,
|
|
description=(
|
|
"An initial model to load. Make sure the model is located in the model "
|
|
"directory! REQUIRED: This must be filled out to load a model on startup."
|
|
),
|
|
)
|
|
use_as_default: List[str] = Field(
|
|
default_factory=list,
|
|
description=(
|
|
"Names of args to use as a default fallback for API load requests "
|
|
"(default: []). Example: ['max_seq_len', 'cache_mode']"
|
|
),
|
|
)
|
|
max_seq_len: Optional[int] = Field(
|
|
None,
|
|
description=(
|
|
"Max sequence length. Fetched from the model's base sequence length in "
|
|
"config.json by default."
|
|
),
|
|
ge=0,
|
|
)
|
|
override_base_seq_len: Optional[int] = Field(
|
|
None,
|
|
description=(
|
|
"Overrides base model context length. WARNING: Only use this if the "
|
|
"model's base sequence length is incorrect."
|
|
),
|
|
ge=0,
|
|
)
|
|
tensor_parallel: Optional[bool] = Field(
|
|
False,
|
|
description=(
|
|
"Load model with tensor parallelism. Fallback to autosplit if GPU split "
|
|
"isn't provided."
|
|
),
|
|
)
|
|
gpu_split_auto: Optional[bool] = Field(
|
|
True,
|
|
description=(
|
|
"Automatically allocate resources to GPUs (default: True). Not parsed for "
|
|
"single GPU users."
|
|
),
|
|
)
|
|
autosplit_reserve: List[int] = Field(
|
|
[96],
|
|
description=(
|
|
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
|
|
"Represented as an array of MB per GPU."
|
|
),
|
|
)
|
|
gpu_split: List[float] = Field(
|
|
default_factory=list,
|
|
description=(
|
|
"An integer array of GBs of VRAM to split between GPUs (default: []). "
|
|
"Used with tensor parallelism."
|
|
),
|
|
)
|
|
rope_scale: Optional[float] = Field(
|
|
1.0,
|
|
description=(
|
|
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
|
|
"model was trained on long context with rope."
|
|
),
|
|
)
|
|
rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
|
|
1.0,
|
|
description=(
|
|
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
|
|
"calculate."
|
|
),
|
|
)
|
|
cache_mode: Optional[CACHE_SIZES] = Field(
|
|
"FP16",
|
|
description=(
|
|
"Enable different cache modes for VRAM savings (default: FP16). Possible "
|
|
f"values: {str(CACHE_SIZES)[15:-1]}"
|
|
),
|
|
)
|
|
cache_size: Optional[int] = Field(
|
|
None,
|
|
description=(
|
|
"Size of the prompt cache to allocate (default: max_seq_len). Must be a "
|
|
"multiple of 256."
|
|
),
|
|
multiple_of=256,
|
|
gt=0,
|
|
)
|
|
chunk_size: Optional[int] = Field(
|
|
2048,
|
|
description=(
|
|
"Chunk size for prompt ingestion (default: 2048). A lower value reduces "
|
|
"VRAM usage but decreases ingestion speed."
|
|
),
|
|
gt=0,
|
|
)
|
|
max_batch_size: Optional[int] = Field(
|
|
None,
|
|
description=(
|
|
"Set the maximum number of prompts to process at one time (default: "
|
|
"None/Automatic). Automatically calculated if left blank."
|
|
),
|
|
ge=1,
|
|
)
|
|
prompt_template: Optional[str] = Field(
|
|
None,
|
|
description=(
|
|
"Set the prompt template for this model. If empty, attempts to look for "
|
|
"the model's chat template."
|
|
),
|
|
)
|
|
num_experts_per_token: Optional[int] = Field(
|
|
None,
|
|
description=(
|
|
"Number of experts to use per token. Fetched from the model's "
|
|
"config.json. For MoE models only."
|
|
),
|
|
ge=1,
|
|
)
|
|
fasttensors: Optional[bool] = Field(
|
|
False,
|
|
description=(
|
|
"Enables fasttensors to possibly increase model loading speeds (default: "
|
|
"False)."
|
|
),
|
|
)
|
|
|
|
_metadata: Metadata = PrivateAttr(Metadata())
|
|
model_config = ConfigDict(protected_namespaces=())
|
|
|
|
|
|
class DraftModelConfig(BaseConfigModel):
|
|
"""
|
|
Options for draft models (speculative decoding)
|
|
This will use more VRAM!
|
|
"""
|
|
|
|
# TODO: convert this to a pathlib.path?
|
|
draft_model_dir: Optional[str] = Field(
|
|
"models",
|
|
description=(
|
|
"Overrides the directory to look for draft models (default: models)"
|
|
),
|
|
)
|
|
draft_model_name: Optional[str] = Field(
|
|
None,
|
|
description=(
|
|
"An initial draft model to load. Ensure the model is in the model"
|
|
"directory."
|
|
),
|
|
)
|
|
draft_rope_scale: Optional[float] = Field(
|
|
1.0,
|
|
description=(
|
|
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
|
|
"Use if the draft model was trained on long context with rope."
|
|
),
|
|
)
|
|
draft_rope_alpha: Optional[float] = Field(
|
|
None,
|
|
description=(
|
|
"Rope alpha for draft models (default: None). Same as alpha_value. Leave "
|
|
"blank to auto-calculate the alpha value."
|
|
),
|
|
)
|
|
draft_cache_mode: Optional[CACHE_SIZES] = Field(
|
|
"FP16",
|
|
description=(
|
|
"Cache mode for draft models to save VRAM (default: FP16). Possible "
|
|
f"values: {str(CACHE_SIZES)[15:-1]}"
|
|
),
|
|
)
|
|
|
|
|
|
class LoraInstanceModel(BaseConfigModel):
|
|
"""Model representing an instance of a Lora."""
|
|
|
|
name: str = Field(..., description=("Name of the LoRA model"))
|
|
scaling: float = Field(
|
|
1.0,
|
|
description=("Scaling factor for the LoRA model (default: 1.0)"),
|
|
ge=0,
|
|
)
|
|
|
|
|
|
class LoraConfig(BaseConfigModel):
|
|
"""Options for Loras"""
|
|
|
|
# TODO: convert this to a pathlib.path?
|
|
lora_dir: Optional[str] = Field(
|
|
"loras", description=("Directory to look for LoRAs (default: 'loras')")
|
|
)
|
|
loras: Optional[List[LoraInstanceModel]] = Field(
|
|
None,
|
|
description=(
|
|
"List of LoRAs to load and associated scaling factors (default scaling: "
|
|
"1.0)"
|
|
),
|
|
)
|
|
|
|
|
|
class SamplingConfig(BaseConfigModel):
|
|
"""Options for Sampling"""
|
|
|
|
override_preset: Optional[str] = Field(
|
|
None, description=("Select a sampler override preset")
|
|
)
|
|
|
|
|
|
class DeveloperConfig(BaseConfigModel):
|
|
"""Options for development and experimentation"""
|
|
|
|
unsafe_launch: Optional[bool] = Field(
|
|
False, description=("Skip Exllamav2 version check")
|
|
)
|
|
disable_request_streaming: Optional[bool] = Field(
|
|
False, description=("Disables API request streaming")
|
|
)
|
|
cuda_malloc_backend: Optional[bool] = Field(
|
|
False, description=("Runs with the pytorch CUDA malloc backend")
|
|
)
|
|
uvloop: Optional[bool] = Field(
|
|
False, description=("Run asyncio using Uvloop or Winloop")
|
|
)
|
|
realtime_process_priority: Optional[bool] = Field(
|
|
False,
|
|
description=(
|
|
"Set process to use a higher priority For realtime process priority, run "
|
|
"as administrator or sudo Otherwise, the priority will be set to high"
|
|
),
|
|
)
|
|
|
|
|
|
class EmbeddingsConfig(BaseConfigModel):
|
|
"""
|
|
Options for embedding models and loading.
|
|
NOTE: Embeddings requires the "extras" feature to be installed
|
|
Install it via "pip install .[extras]"
|
|
"""
|
|
|
|
# TODO: convert this to a pathlib.path?
|
|
embedding_model_dir: Optional[str] = Field(
|
|
"models",
|
|
description=(
|
|
"Overrides directory to look for embedding models (default: models)"
|
|
),
|
|
)
|
|
embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
|
|
"cpu",
|
|
description=(
|
|
"Device to load embedding models on (default: cpu). Possible values: cpu, "
|
|
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
|
|
),
|
|
)
|
|
embedding_model_name: Optional[str] = Field(
|
|
None, description=("The embeddings model to load")
|
|
)
|
|
|
|
|
|
class TabbyConfigModel(BaseModel):
|
|
"""Base model for a TabbyConfig."""
|
|
|
|
config: ConfigOverrideConfig = Field(
|
|
default_factory=ConfigOverrideConfig.model_construct
|
|
)
|
|
network: NetworkConfig = Field(default_factory=NetworkConfig.model_construct)
|
|
logging: LoggingConfig = Field(default_factory=LoggingConfig.model_construct)
|
|
model: ModelConfig = Field(default_factory=ModelConfig.model_construct)
|
|
draft_model: DraftModelConfig = Field(
|
|
default_factory=DraftModelConfig.model_construct
|
|
)
|
|
lora: LoraConfig = Field(default_factory=LoraConfig.model_construct)
|
|
sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
|
|
developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
|
|
embeddings: EmbeddingsConfig = Field(
|
|
default_factory=EmbeddingsConfig.model_construct
|
|
)
|
|
actions: UtilityActions = Field(default_factory=UtilityActions.model_construct)
|
|
|
|
model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
|
|
|
|
|
|
# TODO: Possibly switch to ruamel.yaml for a more native implementation
|
|
def generate_config_file(
|
|
model: BaseConfigModel = None,
|
|
filename: str = "config_sample.yml",
|
|
indentation: int = 2,
|
|
) -> None:
|
|
"""Creates a config.yml file from Pydantic models."""
|
|
|
|
# Add a preamble
|
|
yaml = dedent("""
|
|
# Sample YAML file for configuration.
|
|
# Comment and uncomment values as needed.
|
|
# Every value has a default within the application.
|
|
# This file serves to be a drop in for config.yml
|
|
|
|
# Unless specified in the comments, DO NOT put these options in quotes!
|
|
# You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
|
|
""")
|
|
|
|
schema = model if model else TabbyConfigModel()
|
|
|
|
# TODO: Make the disordered iteration look cleaner
|
|
iter_once = False
|
|
for field, field_data in schema.model_fields.items():
|
|
subfield_model = field_data.default_factory()
|
|
|
|
if not subfield_model._metadata.include_in_config:
|
|
continue
|
|
|
|
# Since the list is out of order with the length
|
|
# Add newlines from the beginning once one iteration finishes
|
|
# This is a sanity check for formatting
|
|
if iter_once:
|
|
yaml += "\n"
|
|
else:
|
|
iter_once = True
|
|
|
|
for line in getdoc(subfield_model).splitlines():
|
|
yaml += f"# {line}\n"
|
|
|
|
yaml += f"{field}:\n"
|
|
|
|
sub_iter_once = False
|
|
for subfield, subfield_data in subfield_model.model_fields.items():
|
|
# Same logic as iter_once
|
|
if sub_iter_once:
|
|
yaml += "\n"
|
|
else:
|
|
sub_iter_once = True
|
|
|
|
value = subfield_data.default
|
|
value = value if value is not None else ""
|
|
value = value if value is not PydanticUndefined else ""
|
|
yaml += f"{' ' * indentation}# {subfield_data.description}\n"
|
|
yaml += f"{' ' * indentation}{subfield}: {value}\n"
|
|
|
|
with open(filename, "w") as f:
|
|
f.write(yaml)
|