tabbyAPI-ollama/common/multimodal.py
kingbri c652a6e030 API: Transform multimodal into an actual class
Migrate the add method into the class itself. Also, a BaseModel isn't
needed here since this isn't a serialized class.

Signed-off-by: kingbri <bdashore3@proton.me>
2024-11-20 00:06:20 -05:00

30 lines
957 B
Python

from typing import List
from backends.exllamav2.vision import get_image_embedding
from common import model
from loguru import logger
from common.optional_dependencies import dependencies
if dependencies.exllamav2:
from exllamav2 import ExLlamaV2VisionTower
class MultimodalEmbeddingWrapper:
"""Common multimodal embedding wrapper"""
type: str = None
content: List = []
text_alias: List[str] = []
async def add(self, url: str):
# Determine the type of vision embedding to use
if not self.type:
if isinstance(model.container.vision_model, ExLlamaV2VisionTower):
self.type = "ExLlamaV2MMEmbedding"
if self.type == "ExLlamaV2MMEmbedding":
embedding = await get_image_embedding(url)
self.content.append(embedding)
self.text_alias.append(embedding.text_alias)
else:
logger.error("No valid vision model to create embedding")