diff --git a/common/config_models.py b/common/config_models.py index da983b8..286057e 100644 --- a/common/config_models.py +++ b/common/config_models.py @@ -1,248 +1,331 @@ -from pydantic import BaseModel, ConfigDict, Field, model_validator -from typing import List, Optional, Union - -from common.utils import unwrap - - -class config_config_model(BaseModel): - config: Optional[str] = Field( - None, description="Path to an overriding config.yml file" - ) - - -class network_config_model(BaseModel): - host: Optional[str] = Field("127.0.0.1", description="The IP to host on") - port: Optional[int] = Field(5000, description="The port to host on") - disable_auth: Optional[bool] = Field( - False, description="Disable HTTP token authentication with requests" - ) - send_tracebacks: Optional[bool] = Field( - False, description="Decide whether to send error tracebacks over the API" - ) - api_servers: Optional[List[str]] = Field( - [ - "OAI", - ], - description="API servers to enable. Options: (OAI, Kobold)", - ) - - -class logging_config_model(BaseModel): - log_prompt: Optional[bool] = Field(False, description="Enable prompt logging") - log_generation_params: Optional[bool] = Field( - False, description="Enable generation parameter logging" - ) - log_requests: Optional[bool] = Field(False, description="Enable request logging") - - -class model_config_model(BaseModel): - model_dir: str = Field( - "models", - description="Overrides the directory to look for models (default: models). Windows users, do NOT put this path in quotes.", - ) - use_dummy_models: Optional[bool] = Field( - False, - description="Sends dummy model names when the models endpoint is queried. Enable this if looking for specific OAI models.", - ) - model_name: Optional[str] = Field( - None, - description="An initial model to load. Make sure the model is located in the model directory! REQUIRED: This must be filled out to load a model on startup.", - ) - use_as_default: List[str] = Field( - default_factory=list, - description="Names of args to use as a default fallback for API load requests (default: []). Example: ['max_seq_len', 'cache_mode']", - ) - max_seq_len: Optional[int] = Field( - None, - description="Max sequence length. Fetched from the model's base sequence length in config.json by default.", - ) - override_base_seq_len: Optional[int] = Field( - None, - description="Overrides base model context length. WARNING: Only use this if the model's base sequence length is incorrect.", - ) - tensor_parallel: Optional[bool] = Field( - False, - description="Load model with tensor parallelism. Fallback to autosplit if GPU split isn't provided.", - ) - gpu_split_auto: Optional[bool] = Field( - True, - description="Automatically allocate resources to GPUs (default: True). Not parsed for single GPU users.", - ) - autosplit_reserve: List[int] = Field( - [96], - description="Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). Represented as an array of MB per GPU.", - ) - gpu_split: List[float] = Field( - default_factory=list, - description="An integer array of GBs of VRAM to split between GPUs (default: []). Used with tensor parallelism.", - ) - rope_scale: Optional[float] = Field( - 1.0, - description="Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the model was trained on long context with rope.", - ) - rope_alpha: Optional[Union[float, str]] = Field( - 1.0, - description="Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-calculate.", - ) - cache_mode: Optional[str] = Field( - "FP16", - description="Enable different cache modes for VRAM savings (default: FP16). Possible values: FP16, Q8, Q6, Q4.", - ) - cache_size: Optional[int] = Field( - None, - description="Size of the prompt cache to allocate (default: max_seq_len). Must be a multiple of 256.", - ) - chunk_size: Optional[int] = Field( - 2048, - description="Chunk size for prompt ingestion (default: 2048). A lower value reduces VRAM usage but decreases ingestion speed.", - ) - max_batch_size: Optional[int] = Field( - None, - description="Set the maximum number of prompts to process at one time (default: None/Automatic). Automatically calculated if left blank.", - ) - prompt_template: Optional[str] = Field( - None, - description="Set the prompt template for this model. If empty, attempts to look for the model's chat template.", - ) - num_experts_per_token: Optional[int] = Field( - None, - description="Number of experts to use per token. Fetched from the model's config.json. For MoE models only.", - ) - fasttensors: Optional[bool] = Field( - False, - description="Enables fasttensors to possibly increase model loading speeds (default: False).", - ) - - -class draft_model_config_model(BaseModel): - draft_model_dir: Optional[str] = Field( - "models", - description="Overrides the directory to look for draft models (default: models)", - ) - draft_model_name: Optional[str] = Field( - None, - description="An initial draft model to load. Ensure the model is in the model directory.", - ) - draft_rope_scale: Optional[float] = Field( - 1.0, - description="Rope scale for draft models (default: 1.0). Same as compress_pos_emb. Use if the draft model was trained on long context with rope.", - ) - draft_rope_alpha: Optional[float] = Field( - None, - description="Rope alpha for draft models (default: None). Same as alpha_value. Leave blank to auto-calculate the alpha value.", - ) - draft_cache_mode: Optional[str] = Field( - "FP16", - description="Cache mode for draft models to save VRAM (default: FP16). Possible values: FP16, Q8, Q6, Q4.", - ) - - -class lora_instance_model(BaseModel): - name: str = Field(..., description="Name of the LoRA model") - scaling: float = Field( - 1.0, description="Scaling factor for the LoRA model (default: 1.0)" - ) - - -class lora_config_model(BaseModel): - lora_dir: Optional[str] = Field( - "loras", description="Directory to look for LoRAs (default: 'loras')" - ) - loras: Optional[List[lora_instance_model]] = Field( - None, - description="List of LoRAs to load and associated scaling factors (default scaling: 1.0)", - ) - - -class sampling_config_model(BaseModel): - override_preset: Optional[str] = Field( - None, description="Select a sampler override preset" - ) - - -class developer_config_model(BaseModel): - unsafe_launch: Optional[bool] = Field( - False, description="Skip Exllamav2 version check" - ) - disable_request_streaming: Optional[bool] = Field( - False, description="Disables API request streaming" - ) - cuda_malloc_backend: Optional[bool] = Field( - False, description="Runs with the pytorch CUDA malloc backend" - ) - uvloop: Optional[bool] = Field( - False, description="Run asyncio using Uvloop or Winloop" - ) - realtime_process_priority: Optional[bool] = Field( - False, - description="Set process to use a higher priority For realtime process priority, run as administrator or sudo Otherwise, the priority will be set to high", - ) - - -class embeddings_config_model(BaseModel): - embedding_model_dir: Optional[str] = Field( - "models", - description="Overrides directory to look for embedding models (default: models)", - ) - embeddings_device: Optional[str] = Field( - "cpu", - description="Device to load embedding models on (default: cpu). Possible values: cpu, auto, cuda. If using an AMD GPU, set this value to 'cuda'.", - ) - embedding_model_name: Optional[str] = Field( - None, description="The embeddings model to load" - ) - - -class tabby_config_model(BaseModel): - config: config_config_model = Field(default_factory=config_config_model) - network: network_config_model = Field(default_factory=network_config_model) - logging: logging_config_model = Field(default_factory=logging_config_model) - model: model_config_model = Field(default_factory=model_config_model) - draft_model: draft_model_config_model = Field( - default_factory=draft_model_config_model - ) - lora: lora_config_model = Field(default_factory=lora_config_model) - sampling: sampling_config_model = Field(default_factory=sampling_config_model) - developer: developer_config_model = Field(default_factory=developer_config_model) - embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model) - - @model_validator(mode="before") - def set_defaults(cls, values): - for field_name, field_value in values.items(): - if field_value is None: - default_instance = cls.__annotations__[field_name]().dict() - values[field_name] = cls.__annotations__[field_name](**default_instance) - return values - - model_config = ConfigDict(validate_assignment=True) - - -def generate_config_file(filename="config_sample.yml", indentation=2): - schema = tabby_config_model.model_json_schema() - - def dump_def(id: str, indent=2): - yaml = "" - indent = " " * indentation * indent - id = id.split("/")[-1] - - section = schema["$defs"][id]["properties"] - for property in section.keys(): # get type - comment = section[property]["description"] - yaml += f"{indent}# {comment}\n" - - value = unwrap(section[property].get("default"), "") - yaml += f"{indent}{property}: {value}\n\n" - - return yaml + "\n" - - yaml = "" - for section in schema["properties"].keys(): - yaml += f"{section}:\n" - yaml += dump_def(schema["properties"][section]["$ref"]) - yaml += "\n" - - with open(filename, "w") as f: - f.write(yaml) - - -# generate_config_file("test.yml") +from pydantic import BaseModel, ConfigDict, Field, model_validator +from typing import List, Optional, Union + +from common.utils import unwrap + + +class config_config_model(BaseModel): + config: Optional[str] = Field( + None, description=("Path to an overriding config.yml file") + ) + + +class network_config_model(BaseModel): + host: Optional[str] = Field("127.0.0.1", description=("The IP to host on")) + port: Optional[int] = Field(5000, description=("The port to host on")) + disable_auth: Optional[bool] = Field( + False, description=("Disable HTTP token authentication with requests") + ) + send_tracebacks: Optional[bool] = Field( + False, + description=("Decide whether to send error tracebacks over the API"), + ) + api_servers: Optional[List[str]] = Field( + [ + "OAI", + ], + description=("API servers to enable. Options: (OAI, Kobold)"), + ) + + +class logging_config_model(BaseModel): + log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging")) + log_generation_params: Optional[bool] = Field( + False, description=("Enable generation parameter logging") + ) + log_requests: Optional[bool] = Field(False, description=("Enable request logging")) + + +class model_config_model(BaseModel): + model_dir: str = Field( + "models", + description=( + "Overrides the directory to look for models (default: models). Windows" + "users, do NOT put this path in quotes." + ), + ) + use_dummy_models: Optional[bool] = Field( + False, + description=( + "Sends dummy model names when the models endpoint is queried. Enable this" + "if looking for specific OAI models." + ), + ) + model_name: Optional[str] = Field( + None, + description=( + "An initial model to load. Make sure the model is located in the model" + "directory! REQUIRED: This must be filled out to load a model on startup." + ), + ) + use_as_default: List[str] = Field( + default_factory=list, + description=( + "Names of args to use as a default fallback for API load requests" + "(default: []). Example: ['max_seq_len', 'cache_mode']" + ), + ) + max_seq_len: Optional[int] = Field( + None, + description=( + "Max sequence length. Fetched from the model's base sequence length in" + "config.json by default." + ), + ) + override_base_seq_len: Optional[int] = Field( + None, + description=( + "Overrides base model context length. WARNING: Only use this if the" + "model's base sequence length is incorrect." + ), + ) + tensor_parallel: Optional[bool] = Field( + False, + description=( + "Load model with tensor parallelism. Fallback to autosplit if GPU split" + "isn't provided." + ), + ) + gpu_split_auto: Optional[bool] = Field( + True, + description=( + "Automatically allocate resources to GPUs (default: True). Not parsed for" + "single GPU users." + ), + ) + autosplit_reserve: List[int] = Field( + [96], + description=( + "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)." + "Represented as an array of MB per GPU." + ), + ) + gpu_split: List[float] = Field( + default_factory=list, + description=( + "An integer array of GBs of VRAM to split between GPUs (default: [])." + "Used with tensor parallelism." + ), + ) + rope_scale: Optional[float] = Field( + 1.0, + description=( + "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the" + "model was trained on long context with rope." + ), + ) + rope_alpha: Optional[Union[float, str]] = Field( + 1.0, + description=( + "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-" + "calculate." + ), + ) + cache_mode: Optional[str] = Field( + "FP16", + description=( + "Enable different cache modes for VRAM savings (default: FP16). Possible" + "values: FP16, Q8, Q6, Q4." + ), + ) + cache_size: Optional[int] = Field( + None, + description=( + "Size of the prompt cache to allocate (default: max_seq_len). Must be a" + "multiple of 256." + ), + ) + chunk_size: Optional[int] = Field( + 2048, + description=( + "Chunk size for prompt ingestion (default: 2048). A lower value reduces" + "VRAM usage but decreases ingestion speed." + ), + ) + max_batch_size: Optional[int] = Field( + None, + description=( + "Set the maximum number of prompts to process at one time (default:" + "None/Automatic). Automatically calculated if left blank." + ), + ) + prompt_template: Optional[str] = Field( + None, + description=( + "Set the prompt template for this model. If empty, attempts to look for" + "the model's chat template." + ), + ) + num_experts_per_token: Optional[int] = Field( + None, + description=( + "Number of experts to use per token. Fetched from the model's" + "config.json. For MoE models only." + ), + ) + fasttensors: Optional[bool] = Field( + False, + description=( + "Enables fasttensors to possibly increase model loading speeds (default:" + "False)." + ), + ) + + +class draft_model_config_model(BaseModel): + draft_model_dir: Optional[str] = Field( + "models", + description=( + "Overrides the directory to look for draft models (default: models)" + ), + ) + draft_model_name: Optional[str] = Field( + None, + description=( + "An initial draft model to load. Ensure the model is in the model" + "directory." + ), + ) + draft_rope_scale: Optional[float] = Field( + 1.0, + description=( + "Rope scale for draft models (default: 1.0). Same as compress_pos_emb." + "Use if the draft model was trained on long context with rope." + ), + ) + draft_rope_alpha: Optional[float] = Field( + None, + description=( + "Rope alpha for draft models (default: None). Same as alpha_value. Leave" + "blank to auto-calculate the alpha value." + ), + ) + draft_cache_mode: Optional[str] = Field( + "FP16", + description=( + "Cache mode for draft models to save VRAM (default: FP16). Possible" + "values: FP16, Q8, Q6, Q4." + ), + ) + + +class lora_instance_model(BaseModel): + name: str = Field(..., description=("Name of the LoRA model")) + scaling: float = Field( + 1.0, description=("Scaling factor for the LoRA model (default: 1.0)") + ) + + +class lora_config_model(BaseModel): + lora_dir: Optional[str] = Field( + "loras", description=("Directory to look for LoRAs (default: 'loras')") + ) + loras: Optional[List[lora_instance_model]] = Field( + None, + description=( + "List of LoRAs to load and associated scaling factors (default scaling:" + "1.0)" + ), + ) + + +class sampling_config_model(BaseModel): + override_preset: Optional[str] = Field( + None, description=("Select a sampler override preset") + ) + + +class developer_config_model(BaseModel): + unsafe_launch: Optional[bool] = Field( + False, description=("Skip Exllamav2 version check") + ) + disable_request_streaming: Optional[bool] = Field( + False, description=("Disables API request streaming") + ) + cuda_malloc_backend: Optional[bool] = Field( + False, description=("Runs with the pytorch CUDA malloc backend") + ) + uvloop: Optional[bool] = Field( + False, description=("Run asyncio using Uvloop or Winloop") + ) + realtime_process_priority: Optional[bool] = Field( + False, + description=( + "Set process to use a higher priority For realtime process priority, run" + "as administrator or sudo Otherwise, the priority will be set to high" + ), + ) + + +class embeddings_config_model(BaseModel): + embedding_model_dir: Optional[str] = Field( + "models", + description=( + "Overrides directory to look for embedding models (default: models)" + ), + ) + embeddings_device: Optional[str] = Field( + "cpu", + description=( + "Device to load embedding models on (default: cpu). Possible values: cpu," + "auto, cuda. If using an AMD GPU, set this value to 'cuda'." + ), + ) + embedding_model_name: Optional[str] = Field( + None, description=("The embeddings model to load") + ) + + +class tabby_config_model(BaseModel): + config: config_config_model = Field(default_factory=config_config_model) + network: network_config_model = Field(default_factory=network_config_model) + logging: logging_config_model = Field(default_factory=logging_config_model) + model: model_config_model = Field(default_factory=model_config_model) + draft_model: draft_model_config_model = Field( + default_factory=draft_model_config_model + ) + lora: lora_config_model = Field(default_factory=lora_config_model) + sampling: sampling_config_model = Field(default_factory=sampling_config_model) + developer: developer_config_model = Field(default_factory=developer_config_model) + embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model) + + @model_validator(mode="before") + def set_defaults(cls, values): + for field_name, field_value in values.items(): + if field_value is None: + default_instance = cls.__annotations__[field_name]().dict() + values[field_name] = cls.__annotations__[field_name](**default_instance) + return values + + model_config = ConfigDict(validate_assignment=True) + + +def generate_config_file(filename="config_sample.yml", indentation=2): + schema = tabby_config_model.model_json_schema() + + def dump_def(id: str, indent=2): + yaml = "" + indent = " " * indentation * indent + id = id.split("/")[-1] + + section = schema["$defs"][id]["properties"] + for property in section.keys(): # get type + comment = section[property]["description"] + yaml += f"{indent}# {comment}\n" + + value = unwrap(section[property].get("default"), "") + yaml += f"{indent}{property}: {value}\n\n" + + return yaml + "\n" + + yaml = "" + for section in schema["properties"].keys(): + yaml += f"{section}:\n" + yaml += dump_def(schema["properties"][section]["$ref"]) + yaml += "\n" + + with open(filename, "w") as f: + f.write(yaml) + + +# generate_config_file("test.yml")