Embeddings: Migrate and organize Infinity

Use Infinity as a separate backend and handle the model within the
common module. This separates out the embeddings model from the endpoint
which allows for model loading/unloading in core.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-07-30 11:00:23 -04:00
parent ac1afcc588
commit fbf1455db1
6 changed files with 165 additions and 83 deletions

12
main.py
View file

@ -87,6 +87,18 @@ async def entrypoint_async():
lora_dir = pathlib.Path(unwrap(lora_config.get("lora_dir"), "loras"))
await model.container.load_loras(lora_dir.resolve(), **lora_config)
# If an initial embedding model name is specified, create a separate container
# and load the model
embedding_config = config.embeddings_config()
embedding_model_name = embedding_config.get("embeddings_model_name")
if embedding_model_name:
embedding_model_path = pathlib.Path(
unwrap(embedding_config.get("embeddings_model_dir"), "models")
)
embedding_model_path = embedding_model_path / embedding_model_name
await model.load_embeddings_model(embedding_model_path, **embedding_config)
await start_api(host, port)