diff --git a/common/sampling.py b/common/sampling.py index 2305316..6c22bfd 100644 --- a/common/sampling.py +++ b/common/sampling.py @@ -23,90 +23,90 @@ class BaseSamplerRequest(BaseModel): examples=[512], ) - stop: Union[str, List[str]] = Field( + stop: Optional[Union[str, List[str]]] = Field( default_factory=lambda: get_default_sampler_value("stop", []) ) - token_healing: bool = Field( + token_healing: Optional[bool] = Field( default_factory=lambda: get_default_sampler_value("token_healing", False) ) - temperature: float = Field( + temperature: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("temperature", 1.0), examples=[1.0], ) - temperature_last: bool = Field( + temperature_last: Optional[bool] = Field( default_factory=lambda: get_default_sampler_value("temperature_last", False) ) - smoothing_factor: float = Field( + smoothing_factor: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("smoothing_factor", 0.0), ) - top_k: int = Field( + top_k: Optional[int] = Field( default_factory=lambda: get_default_sampler_value("top_k", 0), ) - top_p: float = Field( + top_p: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("top_p", 1.0), examples=[1.0], ) - top_a: float = Field( + top_a: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("top_a", 0.0) ) - min_p: float = Field( + min_p: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("min_p", 0.0) ) - tfs: float = Field( + tfs: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("tfs", 1.0), examples=[1.0], ) - frequency_penalty: float = Field( + frequency_penalty: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("frequency_penalty", 0.0) ) - presence_penalty: float = Field( + presence_penalty: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("presence_penalty", 0.0) ) - repetition_penalty: float = Field( + repetition_penalty: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("repetition_penalty", 1.0), examples=[1.0], ) - repetition_decay: int = Field( + repetition_decay: Optional[int] = Field( default_factory=lambda: get_default_sampler_value("repetition_decay", 0) ) - mirostat_mode: int = Field( + mirostat_mode: Optional[int] = Field( default_factory=lambda: get_default_sampler_value("mirostat_mode", 0) ) - mirostat_tau: float = Field( + mirostat_tau: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("mirostat_tau", 1.5), examples=[1.5], ) - mirostat_eta: float = Field( + mirostat_eta: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("mirostat_eta", 0.3), examples=[0.3], ) - add_bos_token: bool = Field( + add_bos_token: Optional[bool] = Field( default_factory=lambda: get_default_sampler_value("add_bos_token", True) ) - ban_eos_token: bool = Field( + ban_eos_token: Optional[bool] = Field( default_factory=lambda: get_default_sampler_value("ban_eos_token", False), examples=[False], ) - skip_special_tokens: bool = Field( + skip_special_tokens: Optional[bool] = Field( default_factory=lambda: get_default_sampler_value("ban_eos_token", True), examples=[True], ) @@ -133,14 +133,14 @@ class BaseSamplerRequest(BaseModel): ) # Aliased variables - typical: float = Field( + typical: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("typical", 1.0), validation_alias=AliasChoices("typical", "typical_p"), description="Aliases: typical_p", examples=[1.0], ) - penalty_range: int = Field( + penalty_range: Optional[int] = Field( default_factory=lambda: get_default_sampler_value("penalty_range", -1), validation_alias=AliasChoices( "penalty_range", @@ -150,34 +150,34 @@ class BaseSamplerRequest(BaseModel): description="Aliases: repetition_range, repetition_penalty_range", ) - cfg_scale: float = Field( + cfg_scale: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("cfg_scale", 1.0), validation_alias=AliasChoices("cfg_scale", "guidance_scale"), description="Aliases: guidance_scale", examples=[1.0], ) - max_temp: float = Field( + max_temp: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("max_temp", 1.0), validation_alias=AliasChoices("max_temp", "dynatemp_high"), description="Aliases: dynatemp_high", examples=[1.0], ) - min_temp: float = Field( + min_temp: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("min_temp", 1.0), validation_alias=AliasChoices("min_temp", "dynatemp_low"), description="Aliases: dynatemp_low", examples=[1.0], ) - temp_exponent: float = Field( + temp_exponent: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("temp_exponent", 1.0), validation_alias=AliasChoices("temp_exponent", "dynatemp_exponent"), examples=[1.0], ) - banned_tokens: Union[List[int], str] = Field( + banned_tokens: Optional[Union[List[int], str]] = Field( default_factory=lambda: get_default_sampler_value("banned_tokens", []), validation_alias=AliasChoices("banned_tokens", "custom_token_bans"), description="Aliases: custom_token_bans", diff --git a/endpoints/OAI/types/chat_completion.py b/endpoints/OAI/types/chat_completion.py index c264876..92265a7 100644 --- a/endpoints/OAI/types/chat_completion.py +++ b/endpoints/OAI/types/chat_completion.py @@ -43,8 +43,8 @@ class ChatCompletionRequest(CommonCompletionRequest): # Take in a string as well even though it's not part of the OAI spec messages: Union[str, List[Dict[str, str]]] prompt_template: Optional[str] = None - add_generation_prompt: bool = True - template_vars: dict = {} + add_generation_prompt: Optional[bool] = True + template_vars: Optional[dict] = {} response_prefix: Optional[str] = None diff --git a/endpoints/OAI/types/common.py b/endpoints/OAI/types/common.py index 0c3fe8f..b9aac68 100644 --- a/endpoints/OAI/types/common.py +++ b/endpoints/OAI/types/common.py @@ -26,8 +26,8 @@ class CommonCompletionRequest(BaseSamplerRequest): model: Optional[str] = None # Generation info (remainder is in BaseSamplerRequest superclass) - stream: bool = False - logprobs: int = 0 + stream: Optional[bool] = False + logprobs: Optional[int] = 0 response_format: Optional[CompletionResponseFormat] = Field( default_factory=CompletionResponseFormat ) @@ -36,10 +36,12 @@ class CommonCompletionRequest(BaseSamplerRequest): best_of: Optional[int] = Field( description="Not parsed. Only used for OAI compliance.", default=None ) - echo: bool = Field( + echo: Optional[bool] = Field( description="Not parsed. Only used for OAI compliance.", default=False ) - n: int = Field(description="Not parsed. Only used for OAI compliance.", default=1) + n: Optional[int] = Field( + description="Not parsed. Only used for OAI compliance.", default=1 + ) suffix: Optional[str] = Field( description="Not parsed. Only used for OAI compliance.", default=None ) diff --git a/endpoints/OAI/types/lora.py b/endpoints/OAI/types/lora.py index b9e29bc..8435a8a 100644 --- a/endpoints/OAI/types/lora.py +++ b/endpoints/OAI/types/lora.py @@ -26,7 +26,7 @@ class LoraLoadInfo(BaseModel): """Represents a single Lora load info.""" name: str - scaling: float = 1.0 + scaling: Optional[float] = 1.0 class LoraLoadRequest(BaseModel): diff --git a/endpoints/OAI/types/model.py b/endpoints/OAI/types/model.py index c769960..22895be 100644 --- a/endpoints/OAI/types/model.py +++ b/endpoints/OAI/types/model.py @@ -13,10 +13,10 @@ class ModelCardParameters(BaseModel): # Safe to do this since it's guaranteed to fetch a max seq len # from model_container max_seq_len: Optional[int] = None - rope_scale: float = 1.0 - rope_alpha: float = 1.0 - cache_mode: str = "FP16" - chunk_size: int = 2048 + rope_scale: Optional[float] = 1.0 + rope_alpha: Optional[float] = 1.0 + cache_mode: Optional[str] = "FP16" + chunk_size: Optional[int] = 2048 prompt_template: Optional[str] = None num_experts_per_token: Optional[int] = None use_cfg: Optional[bool] = None @@ -47,7 +47,7 @@ class DraftModelLoadRequest(BaseModel): """Represents a draft model load request.""" draft_model_name: str - draft_rope_scale: float = 1.0 + draft_rope_scale: Optional[float] = 1.0 draft_rope_alpha: Optional[float] = Field( description="Automatically calculated if not present", default=None, @@ -73,9 +73,11 @@ class ModelLoadRequest(BaseModel): default=None, examples=[4096], ) - gpu_split_auto: bool = True - autosplit_reserve: List[float] = [96] - gpu_split: List[float] = Field(default_factory=list, examples=[[24.0, 20.0]]) + gpu_split_auto: Optional[bool] = True + autosplit_reserve: Optional[List[float]] = [96] + gpu_split: Optional[List[float]] = Field( + default_factory=list, examples=[[24.0, 20.0]] + ) rope_scale: Optional[float] = Field( description="Automatically pulled from the model's config if not present", default=None, @@ -86,16 +88,16 @@ class ModelLoadRequest(BaseModel): default=None, examples=[1.0], ) - no_flash_attention: bool = False + no_flash_attention: Optional[bool] = False # low_mem: Optional[bool] = False - cache_mode: str = "FP16" - chunk_size: int = 2048 + cache_mode: Optional[str] = "FP16" + chunk_size: Optional[int] = 2048 prompt_template: Optional[str] = None num_experts_per_token: Optional[int] = None use_cfg: Optional[bool] = None - fasttensors: bool = False + fasttensors: Optional[bool] = False draft: Optional[DraftModelLoadRequest] = None - skip_queue: bool = False + skip_queue: Optional[bool] = False class ModelLoadResponse(BaseModel):