Merge pull request #254 from lucyknada/main

add draft_gpu_split option for spec decoding
This commit is contained in:
Brian 2025-02-11 16:48:03 -05:00 committed by GitHub
commit 2e491472d1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 59 additions and 16 deletions

View file

@ -20,7 +20,7 @@ network:
# Turn on this option if you are ONLY connecting from localhost.
disable_auth: false
# Disable fetching external content in response to requests, such as images from URLs.
# Disable fetching external content in response to requests,such as images from URLs.
disable_fetch_requests: false
# Send tracebacks over the API (default: False).
@ -166,6 +166,10 @@ draft_model:
# Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
draft_cache_mode: FP16
# An integer array of GBs of VRAM to split between GPUs (default: []).
# If this isn't filled in, the draft model is autosplit.
draft_gpu_split: []
# Options for Loras
lora:
# Directory to look for LoRAs (default: loras).