tabbyAPI-ollama/common/config_models.py
kingbri d5b3fde319 Config: Fix descriptions
Appending lines also requires a space between each one otherwise
they'll squish together.

Signed-off-by: kingbri <bdashore3@proton.me>
2024-09-12 22:43:30 -04:00

337 lines
11 KiB
Python

from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing import List, Optional, Union
from common.utils import unwrap
class ConfigOverrideConfig(BaseModel):
config: Optional[str] = Field(
None, description=("Path to an overriding config.yml file")
)
class NetworkConfig(BaseModel):
host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
port: Optional[int] = Field(5000, description=("The port to host on"))
disable_auth: Optional[bool] = Field(
False, description=("Disable HTTP token authentication with requests")
)
send_tracebacks: Optional[bool] = Field(
False,
description=("Decide whether to send error tracebacks over the API"),
)
api_servers: Optional[List[str]] = Field(
[
"OAI",
],
description=("API servers to enable. Options: (OAI, Kobold)"),
)
class LoggingConfig(BaseModel):
log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
log_generation_params: Optional[bool] = Field(
False, description=("Enable generation parameter logging")
)
log_requests: Optional[bool] = Field(False, description=("Enable request logging"))
class ModelConfig(BaseModel):
model_dir: str = Field(
"models",
description=(
"Overrides the directory to look for models (default: models). Windows "
"users, do NOT put this path in quotes."
),
)
use_dummy_models: Optional[bool] = Field(
False,
description=(
"Sends dummy model names when the models endpoint is queried. Enable this "
"if looking for specific OAI models."
),
)
model_name: Optional[str] = Field(
None,
description=(
"An initial model to load. Make sure the model is located in the model "
"directory! REQUIRED: This must be filled out to load a model on startup."
),
)
use_as_default: List[str] = Field(
default_factory=list,
description=(
"Names of args to use as a default fallback for API load requests "
"(default: []). Example: ['max_seq_len', 'cache_mode']"
),
)
max_seq_len: Optional[int] = Field(
None,
description=(
"Max sequence length. Fetched from the model's base sequence length in "
"config.json by default."
),
)
override_base_seq_len: Optional[int] = Field(
None,
description=(
"Overrides base model context length. WARNING: Only use this if the "
"model's base sequence length is incorrect."
),
)
tensor_parallel: Optional[bool] = Field(
False,
description=(
"Load model with tensor parallelism. Fallback to autosplit if GPU split "
"isn't provided."
),
)
gpu_split_auto: Optional[bool] = Field(
True,
description=(
"Automatically allocate resources to GPUs (default: True). Not parsed for "
"single GPU users."
),
)
autosplit_reserve: List[int] = Field(
[96],
description=(
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
"Represented as an array of MB per GPU."
),
)
gpu_split: List[float] = Field(
default_factory=list,
description=(
"An integer array of GBs of VRAM to split between GPUs (default: []). "
"Used with tensor parallelism."
),
)
rope_scale: Optional[float] = Field(
1.0,
description=(
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
"model was trained on long context with rope."
),
)
rope_alpha: Optional[Union[float, str]] = Field(
1.0,
description=(
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
"calculate."
),
)
cache_mode: Optional[str] = Field(
"FP16",
description=(
"Enable different cache modes for VRAM savings (default: FP16). Possible "
"values: FP16, Q8, Q6, Q4."
),
)
cache_size: Optional[int] = Field(
None,
description=(
"Size of the prompt cache to allocate (default: max_seq_len). Must be a "
"multiple of 256."
),
)
chunk_size: Optional[int] = Field(
2048,
description=(
"Chunk size for prompt ingestion (default: 2048). A lower value reduces "
"VRAM usage but decreases ingestion speed."
),
)
max_batch_size: Optional[int] = Field(
None,
description=(
"Set the maximum number of prompts to process at one time (default: "
"None/Automatic). Automatically calculated if left blank."
),
)
prompt_template: Optional[str] = Field(
None,
description=(
"Set the prompt template for this model. If empty, attempts to look for "
"the model's chat template."
),
)
num_experts_per_token: Optional[int] = Field(
None,
description=(
"Number of experts to use per token. Fetched from the model's "
"config.json. For MoE models only."
),
)
fasttensors: Optional[bool] = Field(
False,
description=(
"Enables fasttensors to possibly increase model loading speeds (default: "
"False)."
),
)
model_config = ConfigDict(protected_namespaces=())
class DraftModelConfig(BaseModel):
draft_model_dir: Optional[str] = Field(
"models",
description=(
"Overrides the directory to look for draft models (default: models)"
),
)
draft_model_name: Optional[str] = Field(
None,
description=(
"An initial draft model to load. Ensure the model is in the model"
"directory."
),
)
draft_rope_scale: Optional[float] = Field(
1.0,
description=(
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
"Use if the draft model was trained on long context with rope."
),
)
draft_rope_alpha: Optional[float] = Field(
None,
description=(
"Rope alpha for draft models (default: None). Same as alpha_value. Leave "
"blank to auto-calculate the alpha value."
),
)
draft_cache_mode: Optional[str] = Field(
"FP16",
description=(
"Cache mode for draft models to save VRAM (default: FP16). Possible "
"values: FP16, Q8, Q6, Q4."
),
)
class LoraInstanceModel(BaseModel):
name: str = Field(..., description=("Name of the LoRA model"))
scaling: float = Field(
1.0, description=("Scaling factor for the LoRA model (default: 1.0)")
)
class LoraConfig(BaseModel):
lora_dir: Optional[str] = Field(
"loras", description=("Directory to look for LoRAs (default: 'loras')")
)
loras: Optional[List[LoraInstanceModel]] = Field(
None,
description=(
"List of LoRAs to load and associated scaling factors (default scaling: "
"1.0)"
),
)
class SamplingConfig(BaseModel):
override_preset: Optional[str] = Field(
None, description=("Select a sampler override preset")
)
class DeveloperConfig(BaseModel):
unsafe_launch: Optional[bool] = Field(
False, description=("Skip Exllamav2 version check")
)
disable_request_streaming: Optional[bool] = Field(
False, description=("Disables API request streaming")
)
cuda_malloc_backend: Optional[bool] = Field(
False, description=("Runs with the pytorch CUDA malloc backend")
)
uvloop: Optional[bool] = Field(
False, description=("Run asyncio using Uvloop or Winloop")
)
realtime_process_priority: Optional[bool] = Field(
False,
description=(
"Set process to use a higher priority For realtime process priority, run "
"as administrator or sudo Otherwise, the priority will be set to high"
),
)
class EmbeddingsConfig(BaseModel):
embedding_model_dir: Optional[str] = Field(
"models",
description=(
"Overrides directory to look for embedding models (default: models)"
),
)
embeddings_device: Optional[str] = Field(
"cpu",
description=(
"Device to load embedding models on (default: cpu). Possible values: cpu, "
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
),
)
embedding_model_name: Optional[str] = Field(
None, description=("The embeddings model to load")
)
class TabbyConfigModel(BaseModel):
config: ConfigOverrideConfig = Field(
default_factory=ConfigOverrideConfig.model_construct
)
network: NetworkConfig = Field(default_factory=NetworkConfig.model_construct)
logging: LoggingConfig = Field(default_factory=LoggingConfig.model_construct)
model: ModelConfig = Field(default_factory=ModelConfig.model_construct)
draft_model: DraftModelConfig = Field(
default_factory=DraftModelConfig.model_construct
)
lora: LoraConfig = Field(default_factory=LoraConfig.model_construct)
sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
embeddings: EmbeddingsConfig = Field(
default_factory=EmbeddingsConfig.model_construct
)
@model_validator(mode="before")
def set_defaults(cls, values):
for field_name, field_value in values.items():
if field_value is None:
default_instance = cls.__annotations__[field_name]().dict()
values[field_name] = cls.__annotations__[field_name](**default_instance)
return values
model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
def generate_config_file(filename="config_sample.yml", indentation=2):
schema = TabbyConfigModel.model_json_schema()
def dump_def(id: str, indent=2):
yaml = ""
indent = " " * indentation * indent
id = id.split("/")[-1]
section = schema["$defs"][id]["properties"]
for property in section.keys(): # get type
comment = section[property]["description"]
yaml += f"{indent}# {comment}\n"
value = unwrap(section[property].get("default"), "")
yaml += f"{indent}{property}: {value}\n\n"
return yaml + "\n"
yaml = ""
for section in schema["properties"].keys():
yaml += f"{section}:\n"
yaml += dump_def(schema["properties"][section]["$ref"])
yaml += "\n"
with open(filename, "w") as f:
f.write(yaml)
# generate_config_file("test.yml")