From 9dfa580b1e123a3f944846affc0e64f65de9210d Mon Sep 17 00:00:00 2001 From: kingbri Date: Fri, 17 Nov 2023 01:16:20 -0500 Subject: [PATCH] Model: Add tokens/second output Signed-off-by: kingbri --- model.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/model.py b/model.py index 69d9c2d..8aa1872 100644 --- a/model.py +++ b/model.py @@ -1,5 +1,6 @@ import gc, time, pathlib import torch +from datetime import datetime from exllamav2 import( ExLlamaV2, ExLlamaV2Config, @@ -305,7 +306,8 @@ class ModelContainer: generated_tokens = 0 full_response = "" - last_chunk_time = time.time() + start_time = time.time() + last_chunk_time = start_time save_tokens = torch.empty((1, 0), dtype = torch.bool) chunk_buffer = "" @@ -350,4 +352,7 @@ class ModelContainer: chunk_buffer = "" last_chunk_time = now - if eos or generated_tokens == max_tokens: break \ No newline at end of file + if eos or generated_tokens == max_tokens: break + + elapsed_time = last_chunk_time - start_time + print(f"Response generated in {round(elapsed_time, 2)} seconds ({round(generated_tokens / elapsed_time, 2)} T/s)")