API: Fix error reporting
Make a disconnect on load error consistently. It should be safer to warn the user to run unload (or re-run load) if a model does not load correctly. Also don't log the traceback for request errors that don't have one. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
165cc6fc2d
commit
0b25c208d6
2 changed files with 12 additions and 5 deletions
|
|
@ -27,7 +27,7 @@ class TabbyRequestError(BaseModel):
|
|||
error: TabbyRequestErrorMessage
|
||||
|
||||
|
||||
def get_generator_error(message: str):
|
||||
def get_generator_error(message: str, exc_info: bool = True):
|
||||
"""Get a generator error."""
|
||||
|
||||
generator_error = handle_request_error(message)
|
||||
|
|
@ -35,7 +35,7 @@ def get_generator_error(message: str):
|
|||
return get_sse_packet(generator_error.model_dump_json())
|
||||
|
||||
|
||||
def handle_request_error(message: str):
|
||||
def handle_request_error(message: str, exc_info: bool = True):
|
||||
"""Log a request error to the console."""
|
||||
|
||||
error_message = TabbyRequestErrorMessage(
|
||||
|
|
@ -45,7 +45,7 @@ def handle_request_error(message: str):
|
|||
request_error = TabbyRequestError(error=error_message)
|
||||
|
||||
# Log the error and provided message to the console
|
||||
if error_message.trace:
|
||||
if error_message.trace and exc_info:
|
||||
logger.error(error_message.trace)
|
||||
|
||||
logger.error(f"Sent to request: {message}")
|
||||
|
|
|
|||
11
main.py
11
main.py
|
|
@ -93,11 +93,14 @@ MODEL_CONTAINER: Optional[ExllamaV2Container] = None
|
|||
|
||||
|
||||
async def _check_model_container():
|
||||
"""Checks if a model isn't loading or loaded."""
|
||||
|
||||
if MODEL_CONTAINER is None or not (
|
||||
MODEL_CONTAINER.model_is_loading or MODEL_CONTAINER.model_loaded
|
||||
):
|
||||
error_message = handle_request_error(
|
||||
"No models are currently loaded."
|
||||
"No models are currently loaded.",
|
||||
exc_info=False,
|
||||
).error.message
|
||||
|
||||
raise HTTPException(400, error_message)
|
||||
|
|
@ -221,6 +224,7 @@ async def load_model(request: Request, data: ModelLoadRequest):
|
|||
|
||||
# Unload the existing model
|
||||
if MODEL_CONTAINER and MODEL_CONTAINER.model:
|
||||
logger.info("Unloading existing model.")
|
||||
await unload_model()
|
||||
|
||||
MODEL_CONTAINER = ExllamaV2Container(model_path.resolve(), False, **load_data)
|
||||
|
|
@ -231,7 +235,10 @@ async def load_model(request: Request, data: ModelLoadRequest):
|
|||
try:
|
||||
for module, modules in load_status:
|
||||
if await request.is_disconnected():
|
||||
await unload_model()
|
||||
logger.error(
|
||||
"Model load cancelled by user. "
|
||||
"Please make sure to run unload to free up resources."
|
||||
)
|
||||
break
|
||||
|
||||
if module == 0:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue