From 0c4cc1eba37768d7b7d4229332a071482a95e500 Mon Sep 17 00:00:00 2001 From: kingbri <8082010+kingbri1@users.noreply.github.com> Date: Sat, 17 May 2025 21:39:41 -0400 Subject: [PATCH] Model: Add prompt logging to ExllamaV3 Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com> --- backends/exllamav3/model.py | 7 +++++++ common/gen_logging.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 7b39a33..1fcc8ef 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -28,6 +28,7 @@ from common.concurrency import iterate_in_threadpool from common.gen_logging import ( log_generation_params, log_metrics, + log_prompt, ) from common.hardware import hardware_supports_flash_attn from common.health import HealthManager @@ -840,6 +841,12 @@ class ExllamaV3Container(BaseModelContainer): f"max_seq_len {self.max_seq_len}" ) + # Log prompt to console. Add the BOS token if specified + log_prompt( + f"{self.tokenizer.bos_token if add_bos_token else ''}{prompt}", + request_id, + ) + generation = {} job = AsyncJob( self.generator, diff --git a/common/gen_logging.py b/common/gen_logging.py index 150d63c..490d257 100644 --- a/common/gen_logging.py +++ b/common/gen_logging.py @@ -29,7 +29,7 @@ def log_generation_params(**kwargs): logger.info(f"Generation options: {kwargs}\n") -def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]): +def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str] = None): """Logs the prompt to console.""" if config.logging.log_prompt: formatted_prompt = "\n" + prompt