* add github workflows for pylint and yapf * yapf * docstrings for auth * fix auth.py * fix generators.py * fix gen_logging.py * fix main.py * fix model.py * fix templating.py * fix utils.py * update formatting.sh to include subdirs for pylint * fix model_test.py * fix wheel_test.py * rename utils to utils_oai * fix OAI/utils_oai.py * fix completion.py * fix token.py * fix lora.py * fix common.py * add pylintrc and fix model.py * finish up pylint * fix attribute error * main.py formatting * add formatting batch script * Main: Remove unnecessary global Linter suggestion. Signed-off-by: kingbri <bdashore3@proton.me> * switch to ruff * Formatting + Linting: Add ruff.toml Signed-off-by: kingbri <bdashore3@proton.me> * Formatting + Linting: Switch scripts to use ruff Also remove the file and recent file change functions from both scripts. Signed-off-by: kingbri <bdashore3@proton.me> * Tree: Format and lint Signed-off-by: kingbri <bdashore3@proton.me> * Scripts + Workflows: Format Signed-off-by: kingbri <bdashore3@proton.me> * Tree: Remove pylint flags We use ruff now Signed-off-by: kingbri <bdashore3@proton.me> * Tree: Format Signed-off-by: kingbri <bdashore3@proton.me> * Formatting: Line length is 88 Use the same value as Black. Signed-off-by: kingbri <bdashore3@proton.me> * Tree: Format Update to new line length rules. Signed-off-by: kingbri <bdashore3@proton.me> --------- Authored-by: AlpinDale <52078762+AlpinDale@users.noreply.github.com> Co-authored-by: kingbri <bdashore3@proton.me>
51 lines
1.1 KiB
Python
51 lines
1.1 KiB
Python
""" Tokenization types """
|
|
from typing import List
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class CommonTokenRequest(BaseModel):
|
|
"""Represents a common tokenization request."""
|
|
|
|
add_bos_token: bool = True
|
|
encode_special_tokens: bool = True
|
|
decode_special_tokens: bool = True
|
|
|
|
def get_params(self):
|
|
"""Get the parameters for tokenization."""
|
|
return {
|
|
"add_bos_token": self.add_bos_token,
|
|
"encode_special_tokens": self.encode_special_tokens,
|
|
"decode_special_tokens": self.decode_special_tokens,
|
|
}
|
|
|
|
|
|
class TokenEncodeRequest(CommonTokenRequest):
|
|
"""Represents a tokenization request."""
|
|
|
|
text: str
|
|
|
|
|
|
class TokenEncodeResponse(BaseModel):
|
|
"""Represents a tokenization response."""
|
|
|
|
tokens: List[int]
|
|
length: int
|
|
|
|
|
|
class TokenDecodeRequest(CommonTokenRequest):
|
|
""" " Represents a detokenization request."""
|
|
|
|
tokens: List[int]
|
|
|
|
|
|
class TokenDecodeResponse(BaseModel):
|
|
"""Represents a detokenization response."""
|
|
|
|
text: str
|
|
|
|
|
|
class TokenCountResponse(BaseModel):
|
|
"""Represents a token count response."""
|
|
|
|
length: int
|