API: Fix CFG reporting

THe model endpoint wasn't reporting if CFG is on.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-01-02 13:54:16 -05:00
parent bbd4ee54ca
commit 6b04463051
3 changed files with 3 additions and 1 deletions

View file

@ -18,6 +18,7 @@ class ModelCardParameters(BaseModel):
cache_mode: Optional[str] = "FP16"
prompt_template: Optional[str] = None
num_experts_per_token: Optional[int] = None
use_cfg: Optional[bool] = None
draft: Optional["ModelCard"] = None

View file

@ -87,7 +87,7 @@ model:
# Enables CFG support (default: False)
# WARNING: This flag disables Flash Attention! (a stopgap fix until it's fixed in upstream)
use_cfg: False
#use_cfg: False
# Options for draft models (speculative decoding). This will use more VRAM!
#draft:

View file

@ -122,6 +122,7 @@ async def get_current_model():
cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
prompt_template=prompt_template.name if prompt_template else None,
num_experts_per_token=MODEL_CONTAINER.config.num_experts_per_token,
use_cfg=MODEL_CONTAINER.use_cfg,
),
logging=gen_logging.PREFERENCES,
)