Tree: Switch to async generators

Async generation helps remove many roadblocks to managing tasks using threads. It should allow for abortables and modern-day paradigms. NOTE: Exllamav2 itself is not an asynchronous library. It's just been added into tabby's async nature to allow for a fast and concurrent API server. It's still being debated to run stream_ex in a separate thread or manually manage it using asyncio.sleep(0) Signed-off-by: kingbri <bdashore3@proton.me>
2024-03-14 10:27:39 -04:00 · 2024-03-14 10:27:39 -04:00 · 7fded4f183
commit 7fded4f183
parent 33e2df50b7
10 changed files with 84 additions and 88 deletions
--- a/common/model.py
+++ b/common/model.py
@ -52,7 +52,7 @@ async def load_model_gen(model_path: pathlib.Path, **kwargs):
    progress.start()

    try:
-        for module, modules in load_status:
+        async for module, modules in load_status:
            if module == 0:
                loading_task = progress.add_task(
                    f"[cyan]Loading {model_type} modules", total=modules
@ -76,12 +76,12 @@ async def load_model(model_path: pathlib.Path, **kwargs):
        pass


-def load_loras(lora_dir, **kwargs):
+async def load_loras(lora_dir, **kwargs):
    """Wrapper to load loras."""
    if len(container.active_loras) > 0:
        unload_loras()

-    return container.load_loras(lora_dir, **kwargs)
+    return await container.load_loras(lora_dir, **kwargs)


 def unload_loras():