API: Fix CFG reporting
THe model endpoint wasn't reporting if CFG is on. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
bbd4ee54ca
commit
6b04463051
3 changed files with 3 additions and 1 deletions
|
|
@ -18,6 +18,7 @@ class ModelCardParameters(BaseModel):
|
|||
cache_mode: Optional[str] = "FP16"
|
||||
prompt_template: Optional[str] = None
|
||||
num_experts_per_token: Optional[int] = None
|
||||
use_cfg: Optional[bool] = None
|
||||
draft: Optional["ModelCard"] = None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ model:
|
|||
|
||||
# Enables CFG support (default: False)
|
||||
# WARNING: This flag disables Flash Attention! (a stopgap fix until it's fixed in upstream)
|
||||
use_cfg: False
|
||||
#use_cfg: False
|
||||
|
||||
# Options for draft models (speculative decoding). This will use more VRAM!
|
||||
#draft:
|
||||
|
|
|
|||
1
main.py
1
main.py
|
|
@ -122,6 +122,7 @@ async def get_current_model():
|
|||
cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
|
||||
prompt_template=prompt_template.name if prompt_template else None,
|
||||
num_experts_per_token=MODEL_CONTAINER.config.num_experts_per_token,
|
||||
use_cfg=MODEL_CONTAINER.use_cfg,
|
||||
),
|
||||
logging=gen_logging.PREFERENCES,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue