Model: Add TokenizerConfig stub and add_eos_token fallback
This stub fetches the add_eos_token field from the HF tokenizer config. Ideally, this should be in the backend rather than tabby. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
parent
aa657fa6e9
commit
47cb2a0de9
3 changed files with 46 additions and 3 deletions
|
|
@ -53,3 +53,23 @@ class HuggingFaceConfig(BaseModel):
|
|||
contents = await hf_config_json.read()
|
||||
hf_config_dict = json.loads(contents)
|
||||
return cls.model_validate(hf_config_dict)
|
||||
|
||||
|
||||
class TokenizerConfig(BaseModel):
|
||||
"""
|
||||
An abridged version of HuggingFace's tokenizer config.
|
||||
"""
|
||||
|
||||
add_bos_token: Optional[bool] = None
|
||||
|
||||
@classmethod
|
||||
async def from_file(cls, model_directory: pathlib.Path):
|
||||
"""Create an instance from a tokenizer config file."""
|
||||
|
||||
tokenizer_config_path = model_directory / "tokenizer_config.json"
|
||||
async with aiofiles.open(
|
||||
tokenizer_config_path, "r", encoding="utf8"
|
||||
) as tokenizer_config_json:
|
||||
contents = await tokenizer_config_json.read()
|
||||
tokenizer_config_dict = json.loads(contents)
|
||||
return cls.model_validate(tokenizer_config_dict)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue