Merge pull request #329 from DocShotgun/exl3
Exllamav3 cache quantization
This commit is contained in:
commit
527afc206b
5 changed files with 70 additions and 12 deletions
|
|
@ -84,7 +84,7 @@ class ChatCompletionRequest(CommonCompletionRequest):
|
|||
|
||||
# Chat completions requests do not have a BOS token preference. Backend
|
||||
# respects the tokenization config for the individual model.
|
||||
add_bos_token: Optional[bool] = Field(default = None)
|
||||
add_bos_token: Optional[bool] = Field(default=None)
|
||||
|
||||
@field_validator("add_bos_token", mode="after")
|
||||
def force_bos_token(cls, v):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue