Config: Alter YAML generation script for formatting adherence

Properly add comments and newlines where they need to go.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-09-17 22:44:09 -04:00
parent 948fcb7f5b
commit a34bd9a684
3 changed files with 92 additions and 71 deletions

View file

@ -333,12 +333,8 @@ class DraftModelConfig(BaseConfigModel):
class LoraInstanceModel(BaseConfigModel):
"""Model representing an instance of a Lora."""
name: Optional[str] = Field(None, description=("Name of the LoRA model."))
scaling: float = Field(
1.0,
description=("Scaling factor for the LoRA model (default: 1.0)."),
ge=0,
)
name: Optional[str] = None
scaling: float = Field(1.0, ge=0)
class LoraConfig(BaseConfigModel):

View file

@ -2,15 +2,15 @@ import pathlib
from inspect import getdoc
from os import getenv
from textwrap import dedent
from typing import Any, Optional
from typing import Optional
from loguru import logger
from pydantic import BaseModel
from pydantic_core import PydanticUndefined
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap, CommentedSeq
from ruamel.yaml.scalarstring import PreservedScalarString
from common.config_models import TabbyConfigModel
from common.config_models import BaseConfigModel, TabbyConfigModel
from common.utils import merge_dicts, unwrap
yaml = YAML()
@ -174,22 +174,10 @@ config: TabbyConfig = TabbyConfig()
def generate_config_file(
model: BaseModel = None,
filename: str = "config_sample.yml",
indentation: int = 2,
) -> None:
"""Creates a config.yml file from Pydantic models."""
schema = unwrap(model, TabbyConfigModel())
preamble = get_preamble()
yaml_content = pydantic_model_to_yaml(schema)
with open(filename, "w") as f:
f.write(preamble)
yaml.dump(yaml_content, f)
def get_preamble() -> str:
"""Returns the cleaned up preamble for the config file."""
preamble = """
# Sample YAML file for configuration.
# Comment and uncomment values as needed.
@ -199,43 +187,80 @@ def get_preamble() -> str:
# Unless specified in the comments, DO NOT put these options in quotes!
# You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
"""
return dedent(preamble).lstrip()
yaml_content = pydantic_model_to_yaml(schema)
with open(filename, "w") as f:
f.write(dedent(preamble).lstrip())
yaml.dump(yaml_content, f)
# Function to convert pydantic model to dict with field descriptions as comments
def pydantic_model_to_yaml(model: BaseModel) -> CommentedMap:
def pydantic_model_to_yaml(model: BaseModel, indentation: int = 0) -> CommentedMap:
"""
Recursively converts a Pydantic model into a CommentedMap,
with descriptions as comments in YAML.
"""
# Create a CommentedMap to hold the output data
yaml_data = CommentedMap()
# Loop through all fields in the model
iteration = 1
for field_name, field_info in model.model_fields.items():
# Get the inner pydantic model
value = getattr(model, field_name)
# If the field is another Pydantic model
if isinstance(value, BaseModel):
yaml_data[field_name] = pydantic_model_to_yaml(value)
# If the field is a list of Pydantic models
elif (
isinstance(value, list)
and len(value) > 0
and isinstance(value[0], BaseModel)
):
yaml_list = CommentedSeq()
for item in value:
yaml_list.append(pydantic_model_to_yaml(item))
yaml_data[field_name] = yaml_list
# Otherwise, just assign the value
else:
yaml_data[field_name] = value
if isinstance(value, BaseConfigModel):
# If the field is another Pydantic model
if not value._metadata.include_in_config:
continue
yaml_data[field_name] = pydantic_model_to_yaml(
value, indentation=indentation + 2
)
comment = getdoc(value)
elif isinstance(value, list) and len(value) > 0:
# If the field is a list
yaml_list = CommentedSeq()
if isinstance(value[0], BaseModel):
# If the field is a list of Pydantic models
# Do not add comments for these items
for item in value:
yaml_list.append(
pydantic_model_to_yaml(item, indentation=indentation + 2)
)
else:
# If the field is a normal list, prefer the YAML flow style
yaml_list.fa.set_flow_style()
yaml_list += [
PreservedScalarString(element)
if isinstance(element, str)
else element
for element in value
]
yaml_data[field_name] = yaml_list
comment = field_info.description
else:
# Otherwise, just assign the value
yaml_data[field_name] = value
comment = field_info.description
if comment:
# Add a newline to every comment but the first one
if iteration != 1:
comment = f"\n{comment}"
# Add field description as a comment if available
if field_info.description:
yaml_data.yaml_set_comment_before_after_key(
field_name, before=field_info.description
field_name, before=comment, indent=indentation
)
# Increment the iteration counter
iteration += 1
return yaml_data

View file

@ -18,27 +18,27 @@ network:
# Disable HTTP token authentication with requests.
# WARNING: This will make your instance vulnerable!
# Turn on this option if you are ONLY connecting from localhost.
disable_auth: False
disable_auth: false
# Send tracebacks over the API (default: False).
# NOTE: Only enable this for debug purposes.
send_tracebacks: False
send_tracebacks: false
# Select API servers to enable (default: ["OAI"]).
# Possible values: OAI, Kobold.
api_servers: ['OAI']
api_servers: ["OAI"]
# Options for logging
logging:
# Enable prompt logging (default: False).
log_prompt: False
log_prompt: false
# Enable generation parameter logging (default: False).
log_generation_params: False
log_generation_params: false
# Enable request logging (default: False).
# NOTE: Only use this for debugging!
log_requests: False
log_requests: false
# Options for model overrides and loading
# Please read the comments to understand how arguments are handled
@ -49,16 +49,16 @@ model:
model_dir: models
# Allow direct loading of models from a completion or chat completion request (default: False).
inline_model_loading: False
inline_model_loading: false
# Sends dummy model names when the models endpoint is queried.
# Enable this if the client is looking for specific OAI models.
use_dummy_models: False
use_dummy_models: false
# An initial model to load.
# Make sure the model is located in the model directory!
# REQUIRED: This must be filled out to load a model on startup.
model_name:
model_name:
# Names of args to use as a fallback for API load requests (default: []).
# For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
@ -67,21 +67,21 @@ model:
# Max sequence length (default: Empty).
# Fetched from the model's base sequence length in config.json by default.
max_seq_len:
max_seq_len:
# Overrides base model context length (default: Empty).
# WARNING: Don't set this unless you know what you're doing!
# Again, do NOT use this for configuring context length, use max_seq_len above ^
override_base_seq_len:
override_base_seq_len:
# Load model with tensor parallelism.
# Falls back to autosplit if GPU split isn't provided.
# This ignores the gpu_split_auto value.
tensor_parallel: False
tensor_parallel: false
# Automatically allocate resources to GPUs (default: True).
# Not parsed for single GPU users.
gpu_split_auto: True
gpu_split_auto: true
# Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
# Represented as an array of MB per GPU.
@ -108,7 +108,7 @@ model:
# Size of the prompt cache to allocate (default: max_seq_len).
# Must be a multiple of 256 and can't be less than max_seq_len.
# For CFG, set this to 2 * max_seq_len.
cache_size:
cache_size:
# Chunk size for prompt ingestion (default: 2048).
# A lower value reduces VRAM usage but decreases ingestion speed.
@ -119,23 +119,23 @@ model:
# Set the maximum number of prompts to process at one time (default: None/Automatic).
# Automatically calculated if left blank.
# NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
max_batch_size:
max_batch_size:
# Set the prompt template for this model. (default: None)
# If empty, attempts to look for the model's chat template.
# If a model contains multiple templates in its tokenizer_config.json,
# set prompt_template to the name of the template you want to use.
# NOTE: Only works with chat completion message lists!
prompt_template:
prompt_template:
# Number of experts to use per token.
# Fetched from the model's config.json if empty.
# NOTE: For MoE models only.
# WARNING: Don't set this unless you know what you're doing!
num_experts_per_token:
num_experts_per_token:
# Enables fasttensors to possibly increase model loading speeds (default: False).
fasttensors: False
fasttensors: false
# Options for draft models (speculative decoding)
# This will use more VRAM!
@ -145,7 +145,7 @@ draft_model:
# An initial draft model to load.
# Ensure the model is in the model directory.
draft_model_name:
draft_model_name:
# Rope scale for draft models (default: 1.0).
# Same as compress_pos_emb.
@ -154,7 +154,7 @@ draft_model:
# Rope alpha for draft models (default: None).
# Same as alpha_value. Set to "auto" to auto-calculate.
draft_rope_alpha:
draft_rope_alpha:
# Cache mode for draft models to save VRAM (default: FP16).
# Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
@ -169,7 +169,7 @@ lora:
# For the YAML file, add each entry as a YAML list:
# - name: lora1
# scaling: 1.0
loras:
loras:
# Options for embedding models and loading.
# NOTE: Embeddings requires the "extras" feature to be installed
@ -185,32 +185,32 @@ embeddings:
embeddings_device: cpu
# An initial embedding model to load on the infinity backend.
embedding_model_name:
embedding_model_name:
# Options for Sampling
sampling:
# Select a sampler override preset (default: None).
# Find this in the sampler-overrides folder.
# This overrides default fallbacks for sampler values that are passed to the API.
override_preset:
override_preset:
# Options for development and experimentation
developer:
# Skip Exllamav2 version check (default: False).
# WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
unsafe_launch: False
unsafe_launch: false
# Disable API request streaming (default: False).
disable_request_streaming: False
disable_request_streaming: false
# Enable the torch CUDA malloc backend (default: False).
cuda_malloc_backend: False
cuda_malloc_backend: false
# Run asyncio using Uvloop or Winloop which can improve performance.
# NOTE: It's recommended to enable this, but if something breaks turn this off.
uvloop: False
uvloop: false
# Set process to use a higher priority.
# For realtime process priority, run as administrator or sudo.
# Otherwise, the priority will be set to high.
realtime_process_priority: False
realtime_process_priority: false