Endpoints: Add props endpoint and add more values to model params
The props endpoint is a standard used by llamacpp APIs which returns various properties of a model to a server. It's still recommended to use /v1/model to get all the parameters a TabbyAPI model has. Also include the contents of a prompt template when fetching the current model. Signed-off-by: kingbri <8082010+bdashore3@users.noreply.github.com>
This commit is contained in:
parent
fa8035ef72
commit
7878d351a7
3 changed files with 51 additions and 3 deletions
|
|
@ -23,9 +23,11 @@ from endpoints.core.types.lora import LoraList, LoraLoadRequest, LoraLoadRespons
|
|||
from endpoints.core.types.model import (
|
||||
EmbeddingModelLoadRequest,
|
||||
ModelCard,
|
||||
ModelDefaultGenerationSettings,
|
||||
ModelList,
|
||||
ModelLoadRequest,
|
||||
ModelLoadResponse,
|
||||
ModelPropsResponse,
|
||||
)
|
||||
from endpoints.core.types.health import HealthCheckResponse
|
||||
from endpoints.core.types.sampler_overrides import (
|
||||
|
|
@ -131,6 +133,30 @@ async def current_model() -> ModelCard:
|
|||
return get_current_model()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/props", dependencies=[Depends(check_api_key), Depends(check_model_container)]
|
||||
)
|
||||
async def model_props() -> ModelPropsResponse:
|
||||
"""
|
||||
Returns specific properties of a model for clients.
|
||||
|
||||
To get all properties, use /v1/model instead.
|
||||
"""
|
||||
|
||||
current_model_card = get_current_model()
|
||||
resp = ModelPropsResponse(
|
||||
total_slots=current_model_card.parameters.max_batch_size,
|
||||
default_generation_settings=ModelDefaultGenerationSettings(
|
||||
n_ctx=current_model_card.parameters.max_seq_len,
|
||||
),
|
||||
)
|
||||
|
||||
if current_model_card.parameters.prompt_template_content:
|
||||
resp.chat_template = current_model_card.parameters.prompt_template_content
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
@router.get("/v1/model/draft/list", dependencies=[Depends(check_api_key)])
|
||||
async def list_draft_models(request: Request) -> ModelList:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -16,10 +16,12 @@ class ModelCardParameters(BaseModel):
|
|||
max_seq_len: Optional[int] = None
|
||||
rope_scale: Optional[float] = 1.0
|
||||
rope_alpha: Optional[float] = 1.0
|
||||
max_batch_size: Optional[int] = 1
|
||||
cache_size: Optional[int] = None
|
||||
cache_mode: Optional[str] = "FP16"
|
||||
chunk_size: Optional[int] = 2048
|
||||
prompt_template: Optional[str] = None
|
||||
prompt_template_content: Optional[str] = None
|
||||
num_experts_per_token: Optional[int] = None
|
||||
use_vision: Optional[bool] = False
|
||||
|
||||
|
|
@ -139,3 +141,17 @@ class ModelLoadResponse(BaseModel):
|
|||
module: int
|
||||
modules: int
|
||||
status: str
|
||||
|
||||
|
||||
class ModelDefaultGenerationSettings(BaseModel):
|
||||
"""Contains default generation settings for model props."""
|
||||
|
||||
n_ctx: int
|
||||
|
||||
|
||||
class ModelPropsResponse(BaseModel):
|
||||
"""Represents a model props response."""
|
||||
|
||||
total_slots: int = 1
|
||||
chat_template: str = ""
|
||||
default_generation_settings: ModelDefaultGenerationSettings
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue