Model: Remove num_experts_per_token

This shouldn't even be an exposed option since changing it always breaks inference with the model. Let the model's config.json handle it. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2025-03-19 11:52:10 -04:00 · 2025-03-19 11:52:10 -04:00 · 79f9c6e854
commit 79f9c6e854
parent 698d8339cb
6 changed files with 0 additions and 30 deletions
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@ -22,7 +22,6 @@ class ModelCardParameters(BaseModel):
    chunk_size: Optional[int] = 2048
    prompt_template: Optional[str] = None
    prompt_template_content: Optional[str] = None
-    num_experts_per_token: Optional[int] = None
    use_vision: Optional[bool] = False

    # Draft is another model, so include it in the card params
@ -114,7 +113,6 @@ class ModelLoadRequest(BaseModel):
    chunk_size: Optional[int] = None
    prompt_template: Optional[str] = None
    vision: Optional[bool] = None
-    num_experts_per_token: Optional[int] = None

    # Non-config arguments
    draft_model: Optional[DraftModelLoadRequest] = Field(