Model: Add TokenizerConfig stub and add_eos_token fallback

This stub fetches the add_eos_token field from the HF tokenizer config.
Ideally, this should be in the backend rather than tabby.

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri 2025-05-02 00:08:01 -04:00
parent aa657fa6e9
commit 47cb2a0de9
3 changed files with 46 additions and 3 deletions

View file

@ -53,3 +53,23 @@ class HuggingFaceConfig(BaseModel):
contents = await hf_config_json.read()
hf_config_dict = json.loads(contents)
return cls.model_validate(hf_config_dict)
class TokenizerConfig(BaseModel):
"""
An abridged version of HuggingFace's tokenizer config.
"""
add_bos_token: Optional[bool] = None
@classmethod
async def from_file(cls, model_directory: pathlib.Path):
"""Create an instance from a tokenizer config file."""
tokenizer_config_path = model_directory / "tokenizer_config.json"
async with aiofiles.open(
tokenizer_config_path, "r", encoding="utf8"
) as tokenizer_config_json:
contents = await tokenizer_config_json.read()
tokenizer_config_dict = json.loads(contents)
return cls.model_validate(tokenizer_config_dict)