Model: Remove override_base_seq_len
This commit is contained in:
parent
7d18d2e2ca
commit
603760cecb
5 changed files with 0 additions and 33 deletions
|
|
@ -220,11 +220,6 @@ class ExllamaV2Container:
|
|||
# Hardcode max output length to 16
|
||||
self.config.max_output_len = 16
|
||||
|
||||
# Then override the base_seq_len if present
|
||||
override_base_seq_len = kwargs.get("override_base_seq_len")
|
||||
if override_base_seq_len:
|
||||
self.config.max_seq_len = override_base_seq_len
|
||||
|
||||
# Grab the base model's sequence length before overrides for
|
||||
# rope calculations
|
||||
base_seq_len = self.config.max_seq_len
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@
|
|||
"# @markdown ---\n",
|
||||
"# @markdown Model parameters:\n",
|
||||
"ContextSize = 4096 # @param {type:\"integer\"}\n",
|
||||
"OverrideBaseSeqLen = 4096 # @param {type:\"integer\"}\n",
|
||||
"RopeScale = 1.0 # @param {type:\"number\"}\n",
|
||||
"RopeAlpha = 1.0 # @param {type:\"number\"}\n",
|
||||
"NumExpertsPerToken = 2 # @param {type:\"integer\"}\n",
|
||||
|
|
@ -169,11 +168,6 @@
|
|||
" # Fetched from the model's base sequence length in config.json by default\n",
|
||||
" max_seq_len: {ContextSize}\n",
|
||||
"\n",
|
||||
" # Overrides base model context length (default: None)\n",
|
||||
" # WARNING: Don't set this unless you know what you're doing!\n",
|
||||
" # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models)\n",
|
||||
" override_base_seq_len: {OverrideBaseSeqLen}\n",
|
||||
"\n",
|
||||
" # Automatically allocate resources to GPUs (default: True)\n",
|
||||
" gpu_split_auto: True\n",
|
||||
"\n",
|
||||
|
|
|
|||
|
|
@ -176,16 +176,6 @@ class ModelConfig(BaseConfigModel):
|
|||
),
|
||||
ge=0,
|
||||
)
|
||||
override_base_seq_len: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Overrides base model context length (default: Empty).\n"
|
||||
"WARNING: Don't set this unless you know what you're doing!\n"
|
||||
"Again, do NOT use this for configuring context length, "
|
||||
"use max_seq_len above ^"
|
||||
),
|
||||
ge=0,
|
||||
)
|
||||
tensor_parallel: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
|
|
|
|||
|
|
@ -69,11 +69,6 @@ model:
|
|||
# Fetched from the model's base sequence length in config.json by default.
|
||||
max_seq_len:
|
||||
|
||||
# Overrides base model context length (default: Empty).
|
||||
# WARNING: Don't set this unless you know what you're doing!
|
||||
# Again, do NOT use this for configuring context length, use max_seq_len above ^
|
||||
override_base_seq_len:
|
||||
|
||||
# Load model with tensor parallelism.
|
||||
# Falls back to autosplit if GPU split isn't provided.
|
||||
# This ignores the gpu_split_auto value.
|
||||
|
|
|
|||
|
|
@ -82,13 +82,6 @@ class ModelLoadRequest(BaseModel):
|
|||
default=None,
|
||||
examples=[4096],
|
||||
)
|
||||
override_base_seq_len: Optional[int] = Field(
|
||||
description=(
|
||||
"Overrides the model's base sequence length. " "Leave blank if unsure"
|
||||
),
|
||||
default=None,
|
||||
examples=[4096],
|
||||
)
|
||||
cache_size: Optional[int] = Field(
|
||||
description=("Number in tokens, must be greater than or equal to max_seq_len"),
|
||||
default=None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue