Add health check monitoring for EXL2 errors (#206)

* Add health check monitoring for EXL2 errors

* Health: Format and change status code

A status code of 503 makes more sense to use.
---------
This commit is contained in:
TerminalMan 2024-09-23 02:40:36 +01:00 committed by GitHub
parent e0ffa90865
commit 2cda890deb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 73 additions and 3 deletions

42
common/health.py Normal file
View file

@ -0,0 +1,42 @@
import asyncio
from collections import deque
from datetime import datetime, timezone
from functools import partial
from pydantic import BaseModel, Field
from typing import Union
class UnhealthyEvent(BaseModel):
"""Represents an error that makes the system unhealthy"""
time: datetime = Field(
default_factory=partial(datetime.now, timezone.utc),
description="Time the error occurred in UTC time",
)
description: str = Field("Unknown error", description="The error message")
class HealthManagerClass:
"""Class to manage the health global state"""
def __init__(self):
# limit the max stored errors to 100 to avoid a memory leak
self.issues: deque[UnhealthyEvent] = deque(maxlen=100)
self._lock = asyncio.Lock()
async def add_unhealthy_event(self, error: Union[str, Exception]):
"""Add a new unhealthy event"""
async with self._lock:
if isinstance(error, Exception):
error = f"{error.__class__.__name__}: {str(error)}"
self.issues.append(UnhealthyEvent(description=error))
async def is_service_healthy(self) -> tuple[bool, list[UnhealthyEvent]]:
"""Check if the service is healthy"""
async with self._lock:
healthy = len(self.issues) == 0
return healthy, list(self.issues)
# Create an instance of the global state manager
HealthManager = HealthManagerClass()