Config: Use an explicit "auto" value for rope_alpha
Using "auto" for rope alpha removes ambiguity on how to explicitly enable automatic rope calculation. The same behavior of None -> auto calculate still exists, but can be overwritten if a model's tabby_config.yml includes `rope_alpha`. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
a96fa5f138
commit
4aebe8a2a5
5 changed files with 50 additions and 17 deletions
|
|
@ -249,10 +249,13 @@ class ExllamaV2Container:
|
|||
kwargs.get("rope_scale"), self.config.scale_pos_emb
|
||||
)
|
||||
|
||||
# Automatically calculate rope alpha
|
||||
self.config.scale_alpha_value = unwrap(
|
||||
kwargs.get("rope_alpha"), self.calculate_rope_alpha(base_seq_len)
|
||||
)
|
||||
# Sets rope alpha value.
|
||||
# Automatically calculate if unset or defined as an "auto" literal.
|
||||
rope_alpha = unwrap(kwargs.get("rope_alpha"), "auto")
|
||||
if rope_alpha == "auto":
|
||||
self.config.scale_alpha_value = self.calculate_rope_alpha(base_seq_len)
|
||||
else:
|
||||
self.config.scale_alpha_value = rope_alpha
|
||||
|
||||
# Enable fasttensors loading if present
|
||||
self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)
|
||||
|
|
@ -344,16 +347,22 @@ class ExllamaV2Container:
|
|||
|
||||
# Set user-configured draft model values
|
||||
if enable_draft:
|
||||
self.draft_config.max_seq_len = self.config.max_seq_len
|
||||
|
||||
self.draft_config.scale_pos_emb = unwrap(
|
||||
draft_args.get("draft_rope_scale"), 1.0
|
||||
)
|
||||
|
||||
# Automatically calculate draft rope alpha
|
||||
self.draft_config.scale_alpha_value = unwrap(
|
||||
draft_args.get("draft_rope_alpha"),
|
||||
self.calculate_rope_alpha(self.draft_config.max_seq_len),
|
||||
)
|
||||
self.draft_config.max_seq_len = self.config.max_seq_len
|
||||
# Set draft rope alpha. Follows same behavior as model rope alpha.
|
||||
draft_rope_alpha = unwrap(draft_args.get("draft_rope_alpha"), "auto")
|
||||
if draft_rope_alpha == "auto":
|
||||
self.draft_config.scale_alpha_value = self.calculate_rope_alpha(
|
||||
self.draft_config.max_seq_len
|
||||
)
|
||||
else:
|
||||
self.draft_config.scale_alpha_value = draft_rope_alpha
|
||||
|
||||
# Set draft cache mode
|
||||
self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16")
|
||||
|
||||
if chunk_size:
|
||||
|
|
|
|||
|
|
@ -13,6 +13,24 @@ def str_to_bool(value):
|
|||
raise ValueError(f"{value} is not a valid boolean value")
|
||||
|
||||
|
||||
def argument_with_auto(value):
|
||||
"""
|
||||
Argparse type wrapper for any argument that has an automatic option.
|
||||
|
||||
Ex. rope_alpha
|
||||
"""
|
||||
|
||||
if value == "auto":
|
||||
return "auto"
|
||||
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError as ex:
|
||||
raise argparse.ArgumentTypeError(
|
||||
'This argument only takes a type of float or "auto"'
|
||||
) from ex
|
||||
|
||||
|
||||
def init_argparser():
|
||||
"""Creates an argument parser that any function can use"""
|
||||
|
||||
|
|
@ -133,7 +151,11 @@ def add_model_args(parser: argparse.ArgumentParser):
|
|||
model_group.add_argument(
|
||||
"--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb"
|
||||
)
|
||||
model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK")
|
||||
model_group.add_argument(
|
||||
"--rope-alpha",
|
||||
type=argument_with_auto,
|
||||
help="Sets rope_alpha for NTK",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--cache-mode",
|
||||
type=str,
|
||||
|
|
|
|||
|
|
@ -149,6 +149,7 @@ async def unload_embedding_model():
|
|||
embeddings_container = None
|
||||
|
||||
|
||||
# FIXME: Maybe make this a one-time function instead of a dynamic default
|
||||
def get_config_default(key: str, model_type: str = "model"):
|
||||
"""Fetches a default value from model config if allowed by the user."""
|
||||
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ model:
|
|||
|
||||
# Rope alpha (default: 1.0)
|
||||
# Same thing as alpha_value
|
||||
# Leave blank to automatically calculate alpha
|
||||
# Set to "auto" to automatically calculate
|
||||
# Leave blank to pull the value from the model
|
||||
#rope_alpha: 1.0
|
||||
|
||||
# Enable different cache modes for VRAM savings (slight performance hit).
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from time import time
|
||||
from typing import List, Optional
|
||||
from typing import List, Literal, Optional, Union
|
||||
|
||||
from common.gen_logging import GenLogPreferences
|
||||
from common.model import get_config_default
|
||||
|
|
@ -56,8 +56,8 @@ class DraftModelLoadRequest(BaseModel):
|
|||
"draft_rope_scale", model_type="draft"
|
||||
)
|
||||
)
|
||||
draft_rope_alpha: Optional[float] = Field(
|
||||
description="Automatically calculated if not present",
|
||||
draft_rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
|
||||
description='Automatically calculated if set to "auto"',
|
||||
default_factory=lambda: get_config_default(
|
||||
"draft_rope_alpha", model_type="draft"
|
||||
),
|
||||
|
|
@ -114,8 +114,8 @@ class ModelLoadRequest(BaseModel):
|
|||
default_factory=lambda: get_config_default("rope_scale"),
|
||||
examples=[1.0],
|
||||
)
|
||||
rope_alpha: Optional[float] = Field(
|
||||
description="Automatically calculated if not present",
|
||||
rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
|
||||
description='Automatically calculated if set to "auto"',
|
||||
default_factory=lambda: get_config_default("rope_alpha"),
|
||||
examples=[1.0],
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue