Model: Add tokens/second output
Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
021981fce0
commit
9dfa580b1e
1 changed files with 7 additions and 2 deletions
9
model.py
9
model.py
|
|
@ -1,5 +1,6 @@
|
|||
import gc, time, pathlib
|
||||
import torch
|
||||
from datetime import datetime
|
||||
from exllamav2 import(
|
||||
ExLlamaV2,
|
||||
ExLlamaV2Config,
|
||||
|
|
@ -305,7 +306,8 @@ class ModelContainer:
|
|||
|
||||
generated_tokens = 0
|
||||
full_response = ""
|
||||
last_chunk_time = time.time()
|
||||
start_time = time.time()
|
||||
last_chunk_time = start_time
|
||||
|
||||
save_tokens = torch.empty((1, 0), dtype = torch.bool)
|
||||
chunk_buffer = ""
|
||||
|
|
@ -350,4 +352,7 @@ class ModelContainer:
|
|||
chunk_buffer = ""
|
||||
last_chunk_time = now
|
||||
|
||||
if eos or generated_tokens == max_tokens: break
|
||||
if eos or generated_tokens == max_tokens: break
|
||||
|
||||
elapsed_time = last_chunk_time - start_time
|
||||
print(f"Response generated in {round(elapsed_time, 2)} seconds ({round(generated_tokens / elapsed_time, 2)} T/s)")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue