Model: Remove override_base_seq_len

This commit is contained in:
DocShotgun 2024-10-30 10:03:08 +08:00
parent 7d18d2e2ca
commit 603760cecb
5 changed files with 0 additions and 33 deletions

View file

@ -220,11 +220,6 @@ class ExllamaV2Container:
# Hardcode max output length to 16
self.config.max_output_len = 16
# Then override the base_seq_len if present
override_base_seq_len = kwargs.get("override_base_seq_len")
if override_base_seq_len:
self.config.max_seq_len = override_base_seq_len
# Grab the base model's sequence length before overrides for
# rope calculations
base_seq_len = self.config.max_seq_len

View file

@ -92,7 +92,6 @@
"# @markdown ---\n",
"# @markdown Model parameters:\n",
"ContextSize = 4096 # @param {type:\"integer\"}\n",
"OverrideBaseSeqLen = 4096 # @param {type:\"integer\"}\n",
"RopeScale = 1.0 # @param {type:\"number\"}\n",
"RopeAlpha = 1.0 # @param {type:\"number\"}\n",
"NumExpertsPerToken = 2 # @param {type:\"integer\"}\n",
@ -169,11 +168,6 @@
" # Fetched from the model's base sequence length in config.json by default\n",
" max_seq_len: {ContextSize}\n",
"\n",
" # Overrides base model context length (default: None)\n",
" # WARNING: Don't set this unless you know what you're doing!\n",
" # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models)\n",
" override_base_seq_len: {OverrideBaseSeqLen}\n",
"\n",
" # Automatically allocate resources to GPUs (default: True)\n",
" gpu_split_auto: True\n",
"\n",

View file

@ -176,16 +176,6 @@ class ModelConfig(BaseConfigModel):
),
ge=0,
)
override_base_seq_len: Optional[int] = Field(
None,
description=(
"Overrides base model context length (default: Empty).\n"
"WARNING: Don't set this unless you know what you're doing!\n"
"Again, do NOT use this for configuring context length, "
"use max_seq_len above ^"
),
ge=0,
)
tensor_parallel: Optional[bool] = Field(
False,
description=(

View file

@ -69,11 +69,6 @@ model:
# Fetched from the model's base sequence length in config.json by default.
max_seq_len:
# Overrides base model context length (default: Empty).
# WARNING: Don't set this unless you know what you're doing!
# Again, do NOT use this for configuring context length, use max_seq_len above ^
override_base_seq_len:
# Load model with tensor parallelism.
# Falls back to autosplit if GPU split isn't provided.
# This ignores the gpu_split_auto value.

View file

@ -82,13 +82,6 @@ class ModelLoadRequest(BaseModel):
default=None,
examples=[4096],
)
override_base_seq_len: Optional[int] = Field(
description=(
"Overrides the model's base sequence length. " "Leave blank if unsure"
),
default=None,
examples=[4096],
)
cache_size: Optional[int] = Field(
description=("Number in tokens, must be greater than or equal to max_seq_len"),
default=None,