Move common functions into their own folder and refactor the backends to use their own folder as well. Also cleanup imports and alphabetize import statments themselves. Finally, move colab and docker into their own folders as well. Signed-off-by: kingbri <bdashore3@proton.me>
50 lines
1.1 KiB
Python
50 lines
1.1 KiB
Python
""" Tokenization types """
|
|
from pydantic import BaseModel
|
|
from typing import List
|
|
|
|
|
|
class CommonTokenRequest(BaseModel):
|
|
"""Represents a common tokenization request."""
|
|
|
|
add_bos_token: bool = True
|
|
encode_special_tokens: bool = True
|
|
decode_special_tokens: bool = True
|
|
|
|
def get_params(self):
|
|
"""Get the parameters for tokenization."""
|
|
return {
|
|
"add_bos_token": self.add_bos_token,
|
|
"encode_special_tokens": self.encode_special_tokens,
|
|
"decode_special_tokens": self.decode_special_tokens,
|
|
}
|
|
|
|
|
|
class TokenEncodeRequest(CommonTokenRequest):
|
|
"""Represents a tokenization request."""
|
|
|
|
text: str
|
|
|
|
|
|
class TokenEncodeResponse(BaseModel):
|
|
"""Represents a tokenization response."""
|
|
|
|
tokens: List[int]
|
|
length: int
|
|
|
|
|
|
class TokenDecodeRequest(CommonTokenRequest):
|
|
""" " Represents a detokenization request."""
|
|
|
|
tokens: List[int]
|
|
|
|
|
|
class TokenDecodeResponse(BaseModel):
|
|
"""Represents a detokenization response."""
|
|
|
|
text: str
|
|
|
|
|
|
class TokenCountResponse(BaseModel):
|
|
"""Represents a token count response."""
|
|
|
|
length: int
|