Model: Bypass lock checks when shutting down
Previously, when a SIGINT was emitted and a model load is running, the API didn't shut down until the load finished due to waitng for the lock. However, when shutting down, the lock doesn't matter since the process is being killed anyway. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
parent
65c16f2a7c
commit
2a33ebbf29
3 changed files with 15 additions and 10 deletions
|
|
@ -734,11 +734,15 @@ class ExllamaV2Container:
|
|||
Free all VRAM resources used by this model
|
||||
"""
|
||||
|
||||
try:
|
||||
await self.load_lock.acquire()
|
||||
# Shutdown immediately unloads and bypasses all locks
|
||||
do_shutdown = kwargs.get("shutdown")
|
||||
|
||||
# Wait for other jobs to finish
|
||||
await self.wait_for_jobs(kwargs.get("skip_wait"))
|
||||
try:
|
||||
if not do_shutdown:
|
||||
await self.load_lock.acquire()
|
||||
|
||||
# Wait for other jobs to finish
|
||||
await self.wait_for_jobs(kwargs.get("skip_wait"))
|
||||
|
||||
# Delete references held in the grammar module
|
||||
clear_grammar_func_cache()
|
||||
|
|
@ -778,10 +782,11 @@ class ExllamaV2Container:
|
|||
|
||||
logger.info("Loras unloaded." if loras_only else "Model unloaded.")
|
||||
finally:
|
||||
self.load_lock.release()
|
||||
if not do_shutdown:
|
||||
self.load_lock.release()
|
||||
|
||||
async with self.load_condition:
|
||||
self.load_condition.notify_all()
|
||||
async with self.load_condition:
|
||||
self.load_condition.notify_all()
|
||||
|
||||
def encode_tokens(self, text: str, **kwargs):
|
||||
"""Wrapper to encode tokens from a text string"""
|
||||
|
|
|
|||
|
|
@ -43,11 +43,11 @@ def load_progress(module, modules):
|
|||
yield module, modules
|
||||
|
||||
|
||||
async def unload_model(skip_wait: bool = False):
|
||||
async def unload_model(skip_wait: bool = False, shutdown: bool = False):
|
||||
"""Unloads a model"""
|
||||
global container
|
||||
|
||||
await container.unload(skip_wait=skip_wait)
|
||||
await container.unload(skip_wait=skip_wait, shutdown=shutdown)
|
||||
container = None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ async def signal_handler_async(*_):
|
|||
"""Internal signal handler. Runs all async code to shut down the program."""
|
||||
|
||||
if model.container:
|
||||
await model.unload_model(skip_wait=True)
|
||||
await model.unload_model(skip_wait=True, shutdown=True)
|
||||
|
||||
if model.embeddings_container:
|
||||
await model.unload_embedding_model()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue