Models can be loaded and unloaded via the API. Also add authentication to use the API and for administrator tasks. Both types of authorization use different keys. Also fix the unload function to properly free all used vram. Signed-off-by: kingbri <bdashore3@proton.me>
13 lines
440 B
Python
13 lines
440 B
Python
from pydantic import BaseModel, Field
|
|
from typing import List, Dict
|
|
|
|
class LogProbs(BaseModel):
|
|
text_offset: List[int] = Field(default_factory=list)
|
|
token_logprobs: List[float] = Field(default_factory=list)
|
|
tokens: List[str] = Field(default_factory=list)
|
|
top_logprobs: List[Dict[str, float]] = Field(default_factory=list)
|
|
|
|
class UsageStats(BaseModel):
|
|
completion_tokens: int
|
|
prompt_tokens: int
|
|
total_tokens: int
|