From 2ebefe8258e99fcce229cb7b90cf71dfd5602a47 Mon Sep 17 00:00:00 2001 From: kingbri Date: Wed, 13 Mar 2024 23:13:55 -0400 Subject: [PATCH] Logging: Move metrics to gen logging This didn't have a place in the generation function. Signed-off-by: kingbri --- backends/exllamav2/model.py | 35 +++++----------------------------- common/gen_logging.py | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index e1c90a5..c803c15 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -20,7 +20,7 @@ from loguru import logger from typing import List, Optional, Union from backends.exllamav2.grammar import ExLlamaV2Grammar -from common.gen_logging import log_generation_params, log_prompt, log_response +from common.gen_logging import log_generation_params, log_metrics, log_prompt, log_response from common.templating import ( PromptTemplate, find_template_from_model, @@ -969,35 +969,10 @@ class ExllamaV2Container: # Print response log_response(full_response) + # Print metrics elapsed_time = last_chunk_time - start_time + context_len = None if ids is None else context_len - initial_response = ( - f"Metrics: {generated_tokens} tokens generated in " - f"{round(elapsed_time, 2)} seconds" - ) - itemization = [] - extra_parts = [] - - # Add tokens per second - tokens_per_second = ( - "Indeterminate" - if elapsed_time == 0 - else round(generated_tokens / elapsed_time, 2) - ) - itemization.append(f"{tokens_per_second} T/s") - - # Add context (original token count) - if ids is not None: - itemization.append(f"context {context_len} tokens") - - if context_len > self.config.max_seq_len: - extra_parts.append("<-- Not accurate (truncated)") - - # Print output - logger.info( - initial_response - + " (" - + ", ".join(itemization) - + ") " - + " ".join(extra_parts) + log_metrics( + generated_tokens, elapsed_time, context_len, self.config.max_seq_len ) diff --git a/common/gen_logging.py b/common/gen_logging.py index 7ae5bf6..2a7ff8e 100644 --- a/common/gen_logging.py +++ b/common/gen_logging.py @@ -67,3 +67,41 @@ def log_response(response: str): if PREFERENCES.prompt: formatted_response = "\n" + response logger.info(f"Response: {formatted_response if response else 'Empty'}\n") + + +def log_metrics( + generated_tokens: int, + elapsed_time: float, + context_len: Optional[int], + max_seq_len: int, +): + initial_response = ( + f"Metrics: {generated_tokens} tokens generated in " + f"{round(elapsed_time, 2)} seconds" + ) + itemization = [] + extra_parts = [] + + # Add tokens per second + tokens_per_second = ( + "Indeterminate" + if elapsed_time == 0 + else round(generated_tokens / elapsed_time, 2) + ) + itemization.append(f"{tokens_per_second} T/s") + + # Add context (original token count) + if context_len: + itemization.append(f"context {context_len} tokens") + + if context_len > max_seq_len: + extra_parts.append("<-- Not accurate (truncated)") + + # Print output + logger.info( + initial_response + + " (" + + ", ".join(itemization) + + ") " + + " ".join(extra_parts) + )