API: Fix CFG reporting

THe model endpoint wasn't reporting if CFG is on. Signed-off-by: kingbri <bdashore3@proton.me>
2024-01-02 13:54:16 -05:00 · 2024-01-02 13:54:16 -05:00 · 6b04463051
commit 6b04463051
parent bbd4ee54ca
3 changed files with 3 additions and 1 deletions
--- a/OAI/types/model.py
+++ b/OAI/types/model.py
@ -18,6 +18,7 @@ class ModelCardParameters(BaseModel):
    cache_mode: Optional[str] = "FP16"
    prompt_template: Optional[str] = None
    num_experts_per_token: Optional[int] = None
+    use_cfg: Optional[bool] = None
    draft: Optional["ModelCard"] = None


--- a/config_sample.yml
+++ b/config_sample.yml
@ -87,7 +87,7 @@ model:

  # Enables CFG support (default: False)
  # WARNING: This flag disables Flash Attention! (a stopgap fix until it's fixed in upstream)
-  use_cfg: False
+  #use_cfg: False

  # Options for draft models (speculative decoding). This will use more VRAM!
  #draft:
--- a/main.py
+++ b/main.py
@ -122,6 +122,7 @@ async def get_current_model():
            cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
            prompt_template=prompt_template.name if prompt_template else None,
            num_experts_per_token=MODEL_CONTAINER.config.num_experts_per_token,
+            use_cfg=MODEL_CONTAINER.use_cfg,
        ),
        logging=gen_logging.PREFERENCES,
    )