Config: Use an explicit "auto" value for rope_alpha

Using "auto" for rope alpha removes ambiguity on how to explicitly enable automatic rope calculation. The same behavior of None -> auto calculate still exists, but can be overwritten if a model's tabby_config.yml includes `rope_alpha`. Signed-off-by: kingbri <bdashore3@proton.me>
2024-08-30 12:45:09 -04:00 · 2024-08-30 12:45:09 -04:00 · 4aebe8a2a5
commit 4aebe8a2a5
parent a96fa5f138
5 changed files with 50 additions and 17 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@ -249,10 +249,13 @@ class ExllamaV2Container:
            kwargs.get("rope_scale"), self.config.scale_pos_emb
        )

-        # Automatically calculate rope alpha
-        self.config.scale_alpha_value = unwrap(
-            kwargs.get("rope_alpha"), self.calculate_rope_alpha(base_seq_len)
-        )
+        # Sets rope alpha value.
+        # Automatically calculate if unset or defined as an "auto" literal.
+        rope_alpha = unwrap(kwargs.get("rope_alpha"), "auto")
+        if rope_alpha == "auto":
+            self.config.scale_alpha_value = self.calculate_rope_alpha(base_seq_len)
+        else:
+            self.config.scale_alpha_value = rope_alpha

        # Enable fasttensors loading if present
        self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)
@ -344,16 +347,22 @@ class ExllamaV2Container:

        # Set user-configured draft model values
        if enable_draft:
+            self.draft_config.max_seq_len = self.config.max_seq_len
+
            self.draft_config.scale_pos_emb = unwrap(
                draft_args.get("draft_rope_scale"), 1.0
            )

-            # Automatically calculate draft rope alpha
-            self.draft_config.scale_alpha_value = unwrap(
-                draft_args.get("draft_rope_alpha"),
-                self.calculate_rope_alpha(self.draft_config.max_seq_len),
-            )
-            self.draft_config.max_seq_len = self.config.max_seq_len
+            # Set draft rope alpha. Follows same behavior as model rope alpha.
+            draft_rope_alpha = unwrap(draft_args.get("draft_rope_alpha"), "auto")
+            if draft_rope_alpha == "auto":
+                self.draft_config.scale_alpha_value = self.calculate_rope_alpha(
+                    self.draft_config.max_seq_len
+                )
+            else:
+                self.draft_config.scale_alpha_value = draft_rope_alpha
+
+            # Set draft cache mode
            self.draft_cache_mode = unwrap(draft_args.get("draft_cache_mode"), "FP16")

            if chunk_size:
--- a/common/args.py
+++ b/common/args.py
@ -13,6 +13,24 @@ def str_to_bool(value):
    raise ValueError(f"{value} is not a valid boolean value")


+def argument_with_auto(value):
+    """
+    Argparse type wrapper for any argument that has an automatic option.
+
+    Ex. rope_alpha
+    """
+
+    if value == "auto":
+        return "auto"
+
+    try:
+        return float(value)
+    except ValueError as ex:
+        raise argparse.ArgumentTypeError(
+            'This argument only takes a type of float or "auto"'
+        ) from ex
+
+
 def init_argparser():
    """Creates an argument parser that any function can use"""

@ -133,7 +151,11 @@ def add_model_args(parser: argparse.ArgumentParser):
    model_group.add_argument(
        "--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb"
    )
-    model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK")
+    model_group.add_argument(
+        "--rope-alpha",
+        type=argument_with_auto,
+        help="Sets rope_alpha for NTK",
+    )
    model_group.add_argument(
        "--cache-mode",
        type=str,
--- a/common/model.py
+++ b/common/model.py
@ -149,6 +149,7 @@ async def unload_embedding_model():
    embeddings_container = None


+# FIXME: Maybe make this a one-time function instead of a dynamic default
 def get_config_default(key: str, model_type: str = "model"):
    """Fetches a default value from model config if allowed by the user."""

--- a/config_sample.yml
+++ b/config_sample.yml
@ -135,7 +135,8 @@ model:

  # Rope alpha (default: 1.0)
  # Same thing as alpha_value
-  # Leave blank to automatically calculate alpha
+  # Set to "auto" to automatically calculate
+  # Leave blank to pull the value from the model
  #rope_alpha: 1.0

  # Enable different cache modes for VRAM savings (slight performance hit).
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@ -2,7 +2,7 @@

 from pydantic import BaseModel, Field, ConfigDict
 from time import time
-from typing import List, Optional
+from typing import List, Literal, Optional, Union

 from common.gen_logging import GenLogPreferences
 from common.model import get_config_default
@ -56,8 +56,8 @@ class DraftModelLoadRequest(BaseModel):
            "draft_rope_scale", model_type="draft"
        )
    )
-    draft_rope_alpha: Optional[float] = Field(
-        description="Automatically calculated if not present",
+    draft_rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
+        description='Automatically calculated if set to "auto"',
        default_factory=lambda: get_config_default(
            "draft_rope_alpha", model_type="draft"
        ),
@ -114,8 +114,8 @@ class ModelLoadRequest(BaseModel):
        default_factory=lambda: get_config_default("rope_scale"),
        examples=[1.0],
    )
-    rope_alpha: Optional[float] = Field(
-        description="Automatically calculated if not present",
+    rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
+        description='Automatically calculated if set to "auto"',
        default_factory=lambda: get_config_default("rope_alpha"),
        examples=[1.0],
    )