Model: Attach request ID to logs

If multiple logs come in at once, track which log corresponds to
which request.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-08-01 00:25:54 -04:00
parent 9390d362dd
commit 0bcb4e4a7d
2 changed files with 9 additions and 4 deletions

View file

@ -1210,7 +1210,7 @@ class ExllamaV2Container:
# Second yield if eos is true
if result.get("eos"):
log_response(full_response)
log_response(request_id, full_response)
eos_reason = result.get("eos_reason")
finish_reason = (
@ -1271,6 +1271,7 @@ class ExllamaV2Container:
# Log the metrics if present
if metrics_result:
log_metrics(
request_id,
metrics_result.get("time_enqueued"),
metrics_result.get("prompt_tokens"),
metrics_result.get("cached_tokens"),

View file

@ -64,14 +64,18 @@ def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
logger.info(f"Negative Prompt: {formatted_negative_prompt}\n")
def log_response(response: str):
def log_response(request_id: str, response: str):
"""Logs the response to console."""
if PREFERENCES.prompt:
formatted_response = "\n" + response
logger.info(f"Response: {formatted_response if response else 'Empty'}\n")
logger.info(
f"Response (ID: {request_id}): "
f"{formatted_response if response else 'Empty'}\n"
)
def log_metrics(
request_id: str,
queue_time: float,
prompt_tokens: int,
cached_tokens: int,
@ -82,7 +86,7 @@ def log_metrics(
max_seq_len: int,
):
initial_response = (
f"Metrics: {generated_tokens} tokens generated in "
f"Metrics (ID: {request_id}): {generated_tokens} tokens generated in "
f"{round(queue_time + prompt_time + generate_time, 2)} seconds"
)
itemization = []