Merge pull request #112 from DocShotgun/main
Separate new prompt tokens from those reused from cache in metric logging
This commit is contained in:
commit
516b52b341
2 changed files with 9 additions and 2 deletions
|
|
@ -1144,6 +1144,7 @@ class ExllamaV2Container:
|
|||
log_metrics(
|
||||
result.get("time_enqueued"),
|
||||
result.get("prompt_tokens"),
|
||||
result.get("cached_tokens"),
|
||||
result.get("time_prefill"),
|
||||
result.get("new_tokens"),
|
||||
result.get("time_generate"),
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ def log_response(response: str):
|
|||
def log_metrics(
|
||||
queue_time: float,
|
||||
prompt_tokens: int,
|
||||
cached_tokens: int,
|
||||
prompt_time: float,
|
||||
generated_tokens: int,
|
||||
generate_time: float,
|
||||
|
|
@ -88,9 +89,14 @@ def log_metrics(
|
|||
itemization.append(f"Queue: {round(queue_time, 2)} s")
|
||||
|
||||
prompt_ts = (
|
||||
"Indeterminate" if prompt_time == 0 else round(prompt_tokens / prompt_time, 2)
|
||||
"Indeterminate"
|
||||
if prompt_time == 0
|
||||
else round((prompt_tokens - cached_tokens) / prompt_time, 2)
|
||||
)
|
||||
itemization.append(
|
||||
f"Process: {cached_tokens} cached tokens and "
|
||||
f"{prompt_tokens - cached_tokens} new tokens at {prompt_ts} T/s"
|
||||
)
|
||||
itemization.append(f"Process: {prompt_ts} T/s")
|
||||
|
||||
generate_ts = (
|
||||
"Indeterminate"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue