From 45b966363ec78d5a667a9b38cb571e4c2da6da37 Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Sat, 3 May 2025 21:01:03 -0700 Subject: [PATCH] Tree: Format --- backends/exllamav3/model.py | 12 +++++++----- endpoints/OAI/types/chat_completion.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index 1026faf..61986c1 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -165,9 +165,7 @@ class ExllamaV3Container(BaseModelContainer): self.draft_model_dir = draft_model_path self.draft_config = Config.from_directory(str(draft_model_path.resolve())) self.draft_model = Model.from_config(self.draft_config) - logger.info( - f'Using draft model: {str(draft_model_path.resolve())}' - ) + logger.info(f"Using draft model: {str(draft_model_path.resolve())}") else: self.draft_model = None self.craft_cache = None @@ -262,7 +260,9 @@ class ExllamaV3Container(BaseModelContainer): case "Q8": self.draft_cache_mode = "8,8" - split_draft_cache_mode = re.search(r"^([2-8])\s*,\s*([2-8])$", self.draft_cache_mode) + split_draft_cache_mode = re.search( + r"^([2-8])\s*,\s*([2-8])$", self.draft_cache_mode + ) if split_draft_cache_mode: draft_k_bits = int(split_draft_cache_mode.group(1)) draft_v_bits = int(split_draft_cache_mode.group(2)) @@ -274,7 +274,9 @@ class ExllamaV3Container(BaseModelContainer): v_bits=draft_v_bits, ) else: - self.draft_cache = Cache(self.draft_model, max_num_tokens = self.cache_size) + self.draft_cache = Cache( + self.draft_model, max_num_tokens=self.cache_size + ) # Max batch size self.max_batch_size = unwrap(kwargs.get("max_batch_size"), 256) diff --git a/endpoints/OAI/types/chat_completion.py b/endpoints/OAI/types/chat_completion.py index 51695c2..fb73eb9 100644 --- a/endpoints/OAI/types/chat_completion.py +++ b/endpoints/OAI/types/chat_completion.py @@ -84,7 +84,7 @@ class ChatCompletionRequest(CommonCompletionRequest): # Chat completions requests do not have a BOS token preference. Backend # respects the tokenization config for the individual model. - add_bos_token: Optional[bool] = Field(default = None) + add_bos_token: Optional[bool] = Field(default=None) @field_validator("add_bos_token", mode="after") def force_bos_token(cls, v):