Config: Use an explicit "auto" value for rope_alpha

Using "auto" for rope alpha removes ambiguity on how to explicitly
enable automatic rope calculation. The same behavior of None -> auto
calculate still exists, but can be overwritten if a model's tabby_config.yml
includes `rope_alpha`.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-08-30 12:45:09 -04:00 committed by Brian Dashore
parent a96fa5f138
commit 4aebe8a2a5
5 changed files with 50 additions and 17 deletions

View file

@ -249,10 +249,13 @@ class ExllamaV2Container:
kwargs.get("rope_scale"), self.config.scale_pos_emb
)
# Automatically calculate rope alpha
self.config.scale_alpha_value = unwrap(
kwargs.get("rope_alpha"), self.calculate_rope_alpha(base_seq_len)
)
# Sets rope alpha value.
# Automatically calculate if unset or defined as an "auto" literal.
rope_alpha = unwrap(kwargs.get("rope_alpha"), "auto")
if rope_alpha == "auto":
self.config.scale_alpha_value = self.calculate_rope_alpha(base_seq_len)
else:
self.config.scale_alpha_value = rope_alpha
# Enable fasttensors loading if present
self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)
@ -344,16 +347,22 @@ class ExllamaV2Container:
# Set user-configured draft model values
if enable_draft:
self.draft_config.max_seq_len = self.config.max_seq_len
self.draft_config.scale_pos_emb = unwrap(
draft_args.get("draft_rope_scale"), 1.0
)
# Automatically calculate draft rope alpha
self.draft_config.scale_alpha_value = unwrap(
draft_args.get("draft_rope_alpha"),
self.calculate_rope_alpha(self.draft_config.max_seq_len),
)
self.draft_config.max_seq_len = self.config.max_seq_len
# Set draft rope alpha. Follows same behavior as model rope alpha.
draft_rope_alpha = unwrap(draft_args.get("draft_rope_alpha"), "auto")
if draft_rope_alpha == "auto":
self.draft_config.scale_alpha_value = self.calculate_rope_alpha(
self.draft_config.max_seq_len
)
else:
self.draft_config.scale_alpha_value = draft_rope_alpha
# Set draft cache mode
self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16")
if chunk_size:

View file

@ -13,6 +13,24 @@ def str_to_bool(value):
raise ValueError(f"{value} is not a valid boolean value")
def argument_with_auto(value):
"""
Argparse type wrapper for any argument that has an automatic option.
Ex. rope_alpha
"""
if value == "auto":
return "auto"
try:
return float(value)
except ValueError as ex:
raise argparse.ArgumentTypeError(
'This argument only takes a type of float or "auto"'
) from ex
def init_argparser():
"""Creates an argument parser that any function can use"""
@ -133,7 +151,11 @@ def add_model_args(parser: argparse.ArgumentParser):
model_group.add_argument(
"--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb"
)
model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK")
model_group.add_argument(
"--rope-alpha",
type=argument_with_auto,
help="Sets rope_alpha for NTK",
)
model_group.add_argument(
"--cache-mode",
type=str,

View file

@ -149,6 +149,7 @@ async def unload_embedding_model():
embeddings_container = None
# FIXME: Maybe make this a one-time function instead of a dynamic default
def get_config_default(key: str, model_type: str = "model"):
"""Fetches a default value from model config if allowed by the user."""

View file

@ -135,7 +135,8 @@ model:
# Rope alpha (default: 1.0)
# Same thing as alpha_value
# Leave blank to automatically calculate alpha
# Set to "auto" to automatically calculate
# Leave blank to pull the value from the model
#rope_alpha: 1.0
# Enable different cache modes for VRAM savings (slight performance hit).

View file

@ -2,7 +2,7 @@
from pydantic import BaseModel, Field, ConfigDict
from time import time
from typing import List, Optional
from typing import List, Literal, Optional, Union
from common.gen_logging import GenLogPreferences
from common.model import get_config_default
@ -56,8 +56,8 @@ class DraftModelLoadRequest(BaseModel):
"draft_rope_scale", model_type="draft"
)
)
draft_rope_alpha: Optional[float] = Field(
description="Automatically calculated if not present",
draft_rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
description='Automatically calculated if set to "auto"',
default_factory=lambda: get_config_default(
"draft_rope_alpha", model_type="draft"
),
@ -114,8 +114,8 @@ class ModelLoadRequest(BaseModel):
default_factory=lambda: get_config_default("rope_scale"),
examples=[1.0],
)
rope_alpha: Optional[float] = Field(
description="Automatically calculated if not present",
rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
description='Automatically calculated if set to "auto"',
default_factory=lambda: get_config_default("rope_alpha"),
examples=[1.0],
)