Merge pull request #329 from DocShotgun/exl3

Exllamav3 cache quantization
This commit is contained in:
Brian 2025-05-08 23:11:45 -04:00 committed by GitHub
commit 527afc206b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 70 additions and 12 deletions

View file

@ -84,7 +84,7 @@ class ChatCompletionRequest(CommonCompletionRequest):
# Chat completions requests do not have a BOS token preference. Backend
# respects the tokenization config for the individual model.
add_bos_token: Optional[bool] = Field(default = None)
add_bos_token: Optional[bool] = Field(default=None)
@field_validator("add_bos_token", mode="after")
def force_bos_token(cls, v):