Model: Add context in response output

When printing to the console, give information about the context
(ingestion token count).

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2023-11-19 00:49:32 -05:00
parent f47919b1d3
commit 31bc418795

View file

@ -373,7 +373,10 @@ class ModelContainer:
# Add tokens per second
extra_responses.append(f"{'Indeterminate' if elapsed_time == 0 else round(generated_tokens / elapsed_time, 2)} T/s")
extra_responses.append(f"{generated_tokens} tokens")
# Add context (original token count)
if ids is not None:
extra_responses.append(f"context {len(ids[0])} tokens")
# Print output
print(initial_response + " (" + ", ".join(extra_responses) + ")")