diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 373a753..5233229 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1210,7 +1210,7 @@ class ExllamaV2Container: # Second yield if eos is true if result.get("eos"): - log_response(full_response) + log_response(request_id, full_response) eos_reason = result.get("eos_reason") finish_reason = ( @@ -1271,6 +1271,7 @@ class ExllamaV2Container: # Log the metrics if present if metrics_result: log_metrics( + request_id, metrics_result.get("time_enqueued"), metrics_result.get("prompt_tokens"), metrics_result.get("cached_tokens"), diff --git a/common/gen_logging.py b/common/gen_logging.py index 94c4405..9995818 100644 --- a/common/gen_logging.py +++ b/common/gen_logging.py @@ -64,14 +64,18 @@ def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]): logger.info(f"Negative Prompt: {formatted_negative_prompt}\n") -def log_response(response: str): +def log_response(request_id: str, response: str): """Logs the response to console.""" if PREFERENCES.prompt: formatted_response = "\n" + response - logger.info(f"Response: {formatted_response if response else 'Empty'}\n") + logger.info( + f"Response (ID: {request_id}): " + f"{formatted_response if response else 'Empty'}\n" + ) def log_metrics( + request_id: str, queue_time: float, prompt_tokens: int, cached_tokens: int, @@ -82,7 +86,7 @@ def log_metrics( max_seq_len: int, ): initial_response = ( - f"Metrics: {generated_tokens} tokens generated in " + f"Metrics (ID: {request_id}): {generated_tokens} tokens generated in " f"{round(queue_time + prompt_time + generate_time, 2)} seconds" ) itemization = []