Config: Alter YAML generation script for formatting adherence
Properly add comments and newlines where they need to go. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
948fcb7f5b
commit
a34bd9a684
3 changed files with 92 additions and 71 deletions
|
|
@ -333,12 +333,8 @@ class DraftModelConfig(BaseConfigModel):
|
|||
class LoraInstanceModel(BaseConfigModel):
|
||||
"""Model representing an instance of a Lora."""
|
||||
|
||||
name: Optional[str] = Field(None, description=("Name of the LoRA model."))
|
||||
scaling: float = Field(
|
||||
1.0,
|
||||
description=("Scaling factor for the LoRA model (default: 1.0)."),
|
||||
ge=0,
|
||||
)
|
||||
name: Optional[str] = None
|
||||
scaling: float = Field(1.0, ge=0)
|
||||
|
||||
|
||||
class LoraConfig(BaseConfigModel):
|
||||
|
|
|
|||
|
|
@ -2,15 +2,15 @@ import pathlib
|
|||
from inspect import getdoc
|
||||
from os import getenv
|
||||
from textwrap import dedent
|
||||
from typing import Any, Optional
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import PydanticUndefined
|
||||
from ruamel.yaml import YAML
|
||||
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
||||
from ruamel.yaml.scalarstring import PreservedScalarString
|
||||
|
||||
from common.config_models import TabbyConfigModel
|
||||
from common.config_models import BaseConfigModel, TabbyConfigModel
|
||||
from common.utils import merge_dicts, unwrap
|
||||
|
||||
yaml = YAML()
|
||||
|
|
@ -174,22 +174,10 @@ config: TabbyConfig = TabbyConfig()
|
|||
def generate_config_file(
|
||||
model: BaseModel = None,
|
||||
filename: str = "config_sample.yml",
|
||||
indentation: int = 2,
|
||||
) -> None:
|
||||
"""Creates a config.yml file from Pydantic models."""
|
||||
|
||||
schema = unwrap(model, TabbyConfigModel())
|
||||
preamble = get_preamble()
|
||||
|
||||
yaml_content = pydantic_model_to_yaml(schema)
|
||||
|
||||
with open(filename, "w") as f:
|
||||
f.write(preamble)
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
|
||||
def get_preamble() -> str:
|
||||
"""Returns the cleaned up preamble for the config file."""
|
||||
preamble = """
|
||||
# Sample YAML file for configuration.
|
||||
# Comment and uncomment values as needed.
|
||||
|
|
@ -199,43 +187,80 @@ def get_preamble() -> str:
|
|||
# Unless specified in the comments, DO NOT put these options in quotes!
|
||||
# You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
|
||||
"""
|
||||
return dedent(preamble).lstrip()
|
||||
|
||||
yaml_content = pydantic_model_to_yaml(schema)
|
||||
|
||||
with open(filename, "w") as f:
|
||||
f.write(dedent(preamble).lstrip())
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
|
||||
# Function to convert pydantic model to dict with field descriptions as comments
|
||||
def pydantic_model_to_yaml(model: BaseModel) -> CommentedMap:
|
||||
def pydantic_model_to_yaml(model: BaseModel, indentation: int = 0) -> CommentedMap:
|
||||
"""
|
||||
Recursively converts a Pydantic model into a CommentedMap,
|
||||
with descriptions as comments in YAML.
|
||||
"""
|
||||
|
||||
# Create a CommentedMap to hold the output data
|
||||
yaml_data = CommentedMap()
|
||||
|
||||
# Loop through all fields in the model
|
||||
iteration = 1
|
||||
for field_name, field_info in model.model_fields.items():
|
||||
# Get the inner pydantic model
|
||||
value = getattr(model, field_name)
|
||||
|
||||
# If the field is another Pydantic model
|
||||
if isinstance(value, BaseModel):
|
||||
yaml_data[field_name] = pydantic_model_to_yaml(value)
|
||||
# If the field is a list of Pydantic models
|
||||
elif (
|
||||
isinstance(value, list)
|
||||
and len(value) > 0
|
||||
and isinstance(value[0], BaseModel)
|
||||
):
|
||||
yaml_list = CommentedSeq()
|
||||
for item in value:
|
||||
yaml_list.append(pydantic_model_to_yaml(item))
|
||||
yaml_data[field_name] = yaml_list
|
||||
# Otherwise, just assign the value
|
||||
else:
|
||||
yaml_data[field_name] = value
|
||||
if isinstance(value, BaseConfigModel):
|
||||
# If the field is another Pydantic model
|
||||
|
||||
if not value._metadata.include_in_config:
|
||||
continue
|
||||
|
||||
yaml_data[field_name] = pydantic_model_to_yaml(
|
||||
value, indentation=indentation + 2
|
||||
)
|
||||
comment = getdoc(value)
|
||||
elif isinstance(value, list) and len(value) > 0:
|
||||
# If the field is a list
|
||||
|
||||
yaml_list = CommentedSeq()
|
||||
if isinstance(value[0], BaseModel):
|
||||
# If the field is a list of Pydantic models
|
||||
# Do not add comments for these items
|
||||
|
||||
for item in value:
|
||||
yaml_list.append(
|
||||
pydantic_model_to_yaml(item, indentation=indentation + 2)
|
||||
)
|
||||
else:
|
||||
# If the field is a normal list, prefer the YAML flow style
|
||||
|
||||
yaml_list.fa.set_flow_style()
|
||||
yaml_list += [
|
||||
PreservedScalarString(element)
|
||||
if isinstance(element, str)
|
||||
else element
|
||||
for element in value
|
||||
]
|
||||
|
||||
yaml_data[field_name] = yaml_list
|
||||
comment = field_info.description
|
||||
else:
|
||||
# Otherwise, just assign the value
|
||||
|
||||
yaml_data[field_name] = value
|
||||
comment = field_info.description
|
||||
|
||||
if comment:
|
||||
# Add a newline to every comment but the first one
|
||||
if iteration != 1:
|
||||
comment = f"\n{comment}"
|
||||
|
||||
# Add field description as a comment if available
|
||||
if field_info.description:
|
||||
yaml_data.yaml_set_comment_before_after_key(
|
||||
field_name, before=field_info.description
|
||||
field_name, before=comment, indent=indentation
|
||||
)
|
||||
|
||||
# Increment the iteration counter
|
||||
iteration += 1
|
||||
|
||||
return yaml_data
|
||||
|
|
|
|||
|
|
@ -18,27 +18,27 @@ network:
|
|||
# Disable HTTP token authentication with requests.
|
||||
# WARNING: This will make your instance vulnerable!
|
||||
# Turn on this option if you are ONLY connecting from localhost.
|
||||
disable_auth: False
|
||||
disable_auth: false
|
||||
|
||||
# Send tracebacks over the API (default: False).
|
||||
# NOTE: Only enable this for debug purposes.
|
||||
send_tracebacks: False
|
||||
send_tracebacks: false
|
||||
|
||||
# Select API servers to enable (default: ["OAI"]).
|
||||
# Possible values: OAI, Kobold.
|
||||
api_servers: ['OAI']
|
||||
api_servers: ["OAI"]
|
||||
|
||||
# Options for logging
|
||||
logging:
|
||||
# Enable prompt logging (default: False).
|
||||
log_prompt: False
|
||||
log_prompt: false
|
||||
|
||||
# Enable generation parameter logging (default: False).
|
||||
log_generation_params: False
|
||||
log_generation_params: false
|
||||
|
||||
# Enable request logging (default: False).
|
||||
# NOTE: Only use this for debugging!
|
||||
log_requests: False
|
||||
log_requests: false
|
||||
|
||||
# Options for model overrides and loading
|
||||
# Please read the comments to understand how arguments are handled
|
||||
|
|
@ -49,16 +49,16 @@ model:
|
|||
model_dir: models
|
||||
|
||||
# Allow direct loading of models from a completion or chat completion request (default: False).
|
||||
inline_model_loading: False
|
||||
inline_model_loading: false
|
||||
|
||||
# Sends dummy model names when the models endpoint is queried.
|
||||
# Enable this if the client is looking for specific OAI models.
|
||||
use_dummy_models: False
|
||||
use_dummy_models: false
|
||||
|
||||
# An initial model to load.
|
||||
# Make sure the model is located in the model directory!
|
||||
# REQUIRED: This must be filled out to load a model on startup.
|
||||
model_name:
|
||||
model_name:
|
||||
|
||||
# Names of args to use as a fallback for API load requests (default: []).
|
||||
# For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
|
||||
|
|
@ -67,21 +67,21 @@ model:
|
|||
|
||||
# Max sequence length (default: Empty).
|
||||
# Fetched from the model's base sequence length in config.json by default.
|
||||
max_seq_len:
|
||||
max_seq_len:
|
||||
|
||||
# Overrides base model context length (default: Empty).
|
||||
# WARNING: Don't set this unless you know what you're doing!
|
||||
# Again, do NOT use this for configuring context length, use max_seq_len above ^
|
||||
override_base_seq_len:
|
||||
override_base_seq_len:
|
||||
|
||||
# Load model with tensor parallelism.
|
||||
# Falls back to autosplit if GPU split isn't provided.
|
||||
# This ignores the gpu_split_auto value.
|
||||
tensor_parallel: False
|
||||
tensor_parallel: false
|
||||
|
||||
# Automatically allocate resources to GPUs (default: True).
|
||||
# Not parsed for single GPU users.
|
||||
gpu_split_auto: True
|
||||
gpu_split_auto: true
|
||||
|
||||
# Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
|
||||
# Represented as an array of MB per GPU.
|
||||
|
|
@ -108,7 +108,7 @@ model:
|
|||
# Size of the prompt cache to allocate (default: max_seq_len).
|
||||
# Must be a multiple of 256 and can't be less than max_seq_len.
|
||||
# For CFG, set this to 2 * max_seq_len.
|
||||
cache_size:
|
||||
cache_size:
|
||||
|
||||
# Chunk size for prompt ingestion (default: 2048).
|
||||
# A lower value reduces VRAM usage but decreases ingestion speed.
|
||||
|
|
@ -119,23 +119,23 @@ model:
|
|||
# Set the maximum number of prompts to process at one time (default: None/Automatic).
|
||||
# Automatically calculated if left blank.
|
||||
# NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
|
||||
max_batch_size:
|
||||
max_batch_size:
|
||||
|
||||
# Set the prompt template for this model. (default: None)
|
||||
# If empty, attempts to look for the model's chat template.
|
||||
# If a model contains multiple templates in its tokenizer_config.json,
|
||||
# set prompt_template to the name of the template you want to use.
|
||||
# NOTE: Only works with chat completion message lists!
|
||||
prompt_template:
|
||||
prompt_template:
|
||||
|
||||
# Number of experts to use per token.
|
||||
# Fetched from the model's config.json if empty.
|
||||
# NOTE: For MoE models only.
|
||||
# WARNING: Don't set this unless you know what you're doing!
|
||||
num_experts_per_token:
|
||||
num_experts_per_token:
|
||||
|
||||
# Enables fasttensors to possibly increase model loading speeds (default: False).
|
||||
fasttensors: False
|
||||
fasttensors: false
|
||||
|
||||
# Options for draft models (speculative decoding)
|
||||
# This will use more VRAM!
|
||||
|
|
@ -145,7 +145,7 @@ draft_model:
|
|||
|
||||
# An initial draft model to load.
|
||||
# Ensure the model is in the model directory.
|
||||
draft_model_name:
|
||||
draft_model_name:
|
||||
|
||||
# Rope scale for draft models (default: 1.0).
|
||||
# Same as compress_pos_emb.
|
||||
|
|
@ -154,7 +154,7 @@ draft_model:
|
|||
|
||||
# Rope alpha for draft models (default: None).
|
||||
# Same as alpha_value. Set to "auto" to auto-calculate.
|
||||
draft_rope_alpha:
|
||||
draft_rope_alpha:
|
||||
|
||||
# Cache mode for draft models to save VRAM (default: FP16).
|
||||
# Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
|
||||
|
|
@ -169,7 +169,7 @@ lora:
|
|||
# For the YAML file, add each entry as a YAML list:
|
||||
# - name: lora1
|
||||
# scaling: 1.0
|
||||
loras:
|
||||
loras:
|
||||
|
||||
# Options for embedding models and loading.
|
||||
# NOTE: Embeddings requires the "extras" feature to be installed
|
||||
|
|
@ -185,32 +185,32 @@ embeddings:
|
|||
embeddings_device: cpu
|
||||
|
||||
# An initial embedding model to load on the infinity backend.
|
||||
embedding_model_name:
|
||||
embedding_model_name:
|
||||
|
||||
# Options for Sampling
|
||||
sampling:
|
||||
# Select a sampler override preset (default: None).
|
||||
# Find this in the sampler-overrides folder.
|
||||
# This overrides default fallbacks for sampler values that are passed to the API.
|
||||
override_preset:
|
||||
override_preset:
|
||||
|
||||
# Options for development and experimentation
|
||||
developer:
|
||||
# Skip Exllamav2 version check (default: False).
|
||||
# WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
|
||||
unsafe_launch: False
|
||||
unsafe_launch: false
|
||||
|
||||
# Disable API request streaming (default: False).
|
||||
disable_request_streaming: False
|
||||
disable_request_streaming: false
|
||||
|
||||
# Enable the torch CUDA malloc backend (default: False).
|
||||
cuda_malloc_backend: False
|
||||
cuda_malloc_backend: false
|
||||
|
||||
# Run asyncio using Uvloop or Winloop which can improve performance.
|
||||
# NOTE: It's recommended to enable this, but if something breaks turn this off.
|
||||
uvloop: False
|
||||
uvloop: false
|
||||
|
||||
# Set process to use a higher priority.
|
||||
# For realtime process priority, run as administrator or sudo.
|
||||
# Otherwise, the priority will be set to high.
|
||||
realtime_process_priority: False
|
||||
realtime_process_priority: false
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue