From 7abbac098abc001bc347ffcf8226fca4d3924907 Mon Sep 17 00:00:00 2001 From: kingbri Date: Sun, 17 Mar 2024 01:04:12 -0400 Subject: [PATCH] Config: Update Q4 in comments Wasn't present when the option was added. Signed-off-by: kingbri --- config_sample.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config_sample.yml b/config_sample.yml index 077be81..070c1d1 100644 --- a/config_sample.yml +++ b/config_sample.yml @@ -103,7 +103,8 @@ model: # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False) #no_flash_attention: False - # Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16) + # Enable 8 bit cache mode for VRAM savings (slight performance hit). + # Possible values FP16, FP8, Q4. (default: FP16) #cache_mode: FP16 # Set the prompt template for this model. If empty, chat completions will be disabled. (default: Empty)