Merge pull request #254 from lucyknada/main
add draft_gpu_split option for spec decoding
This commit is contained in:
commit
2e491472d1
3 changed files with 59 additions and 16 deletions
|
|
@ -20,7 +20,7 @@ network:
|
|||
# Turn on this option if you are ONLY connecting from localhost.
|
||||
disable_auth: false
|
||||
|
||||
# Disable fetching external content in response to requests, such as images from URLs.
|
||||
# Disable fetching external content in response to requests,such as images from URLs.
|
||||
disable_fetch_requests: false
|
||||
|
||||
# Send tracebacks over the API (default: False).
|
||||
|
|
@ -166,6 +166,10 @@ draft_model:
|
|||
# Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
|
||||
draft_cache_mode: FP16
|
||||
|
||||
# An integer array of GBs of VRAM to split between GPUs (default: []).
|
||||
# If this isn't filled in, the draft model is autosplit.
|
||||
draft_gpu_split: []
|
||||
|
||||
# Options for Loras
|
||||
lora:
|
||||
# Directory to look for LoRAs (default: loras).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue