Model: Move calculate_rope_alpha from backend

Makes more sense to use as a utility function. Also clarify how the vars are set. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2025-04-20 18:20:19 -04:00 · 2025-04-20 18:20:19 -04:00 · 8e238fa8f6
commit 8e238fa8f6
parent 027ffce05d
3 changed files with 33 additions and 27 deletions
--- a/common/model.py
+++ b/common/model.py
@ -112,8 +112,6 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
    kwargs = {**config.model_defaults, **kwargs}
    kwargs = await apply_inline_overrides(model_path, **kwargs)

-    print(kwargs)
-
    # Create a new container
    new_container = await ExllamaV2Container.create(
        model_path.resolve(), False, **kwargs
--- a/common/utils.py
+++ b/common/utils.py
@ -87,3 +87,23 @@ def unwrap_optional_type(type_hint) -> Type:

    return type_hint

+
+def calculate_rope_alpha(base_seq_len: int, target_seq_len: int):
+    """
+    Converts a given max sequence length to a rope alpha value.
+
+    Args:
+        base_seq_len: The model's configured sequence length.
+        target_seq_len: The user-specified max sequence length.
+    """
+
+    # Get the ratio of the model's max sequence length to the target
+    ratio = base_seq_len / target_seq_len
+
+    # Default to a 1 alpha if the sequence length is ever less
+    # than or equal to 1
+    if ratio <= 1.0:
+        alpha = 1
+    else:
+        alpha = -0.13436 + 0.80541 * ratio + 0.28833 * ratio**2
+    return alpha