This is actually required for infinity to load a model. Signed-off-by: kingbri <bdashore3@proton.me>
235 lines
13 KiB
TOML
235 lines
13 KiB
TOML
[build-system]
|
|
requires = [
|
|
"packaging",
|
|
"setuptools",
|
|
"wheel",
|
|
]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
# We're not building the project itself
|
|
[tool.setuptools]
|
|
py-modules = []
|
|
|
|
[project]
|
|
name = "tabbyAPI"
|
|
version = "0.0.1"
|
|
description = "An OAI compatible exllamav2 API that's both lightweight and fast"
|
|
requires-python = ">=3.10"
|
|
dependencies = [
|
|
"fastapi-slim >= 0.110.0",
|
|
"pydantic >= 2.0.0",
|
|
"PyYAML",
|
|
"rich",
|
|
"uvicorn >= 0.28.1",
|
|
"jinja2 >= 3.0.0",
|
|
"loguru",
|
|
"sse-starlette",
|
|
"packaging",
|
|
"tokenizers",
|
|
"lm-format-enforcer >= 0.9.6",
|
|
"aiofiles",
|
|
"aiohttp",
|
|
"huggingface_hub",
|
|
"psutil",
|
|
"httptools>=0.5.0",
|
|
|
|
# Improved asyncio loops
|
|
"uvloop ; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
|
"winloop ; platform_system == 'Windows'",
|
|
|
|
# TEMP: Remove once 2.x is fixed in upstream
|
|
"numpy < 2.0.0",
|
|
]
|
|
|
|
[project.urls]
|
|
"Homepage" = "https://github.com/theroyallab/tabbyAPI"
|
|
|
|
[project.optional-dependencies]
|
|
extras = [
|
|
# Heavy dependencies that aren't for everyday use
|
|
"outlines",
|
|
"infinity-emb",
|
|
"sentence-transformers",
|
|
]
|
|
dev = [
|
|
"ruff == 0.3.2"
|
|
]
|
|
cu121 = [
|
|
# Torch (Extra index URLs not support in pyproject.toml)
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'",
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'",
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"torch @ https://download.pytorch.org/whl/cu121/torch-2.3.1%2Bcu121-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
|
|
# Exl2
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu121/exllamav2/exllamav2-0.1.8+cu121.torch2.3.1-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
|
|
# Windows FA2 from https://github.com/bdashore3/flash-attention/releases
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3.1cxx11abiFALSE-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3.1cxx11abiFALSE-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3.1cxx11abiFALSE-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'",
|
|
|
|
# Linux FA2 from https://github.com/Dao-AILab/flash-attention/releases
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu121/flash-attn/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
]
|
|
cu118 = [
|
|
# Torch
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'",
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'",
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"torch @ https://download.pytorch.org/whl/cu118/torch-2.3.1%2Bcu118-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
|
|
# Exl2
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp311-cp311-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.11'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp310-cp310-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.10'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/cu118/exllamav2/exllamav2-0.1.8+cu118.torch2.3.1-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
|
|
# Linux FA2 from https://github.com/Dao-AILab/flash-attention/releases
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu118/flash-attn/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.12'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu118/flash-attn/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.11'",
|
|
"flash_attn @ https://royallab-pip-index.netlify.app/whl/cu118/flash-attn/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and platform_machine == 'x86_64' and python_version == '3.10'",
|
|
]
|
|
amd = [
|
|
# Torch triton for ROCm
|
|
"pytorch_triton_rocm @ https://download.pytorch.org/whl/pytorch_triton_rocm-2.3.1-cp312-cp312-linux_x86_64.whl ; python_version == '3.12'",
|
|
"pytorch_triton_rocm @ https://download.pytorch.org/whl/pytorch_triton_rocm-2.3.1-cp311-cp311-linux_x86_64.whl ; python_version == '3.11'",
|
|
"pytorch_triton_rocm @ https://download.pytorch.org/whl/pytorch_triton_rocm-2.3.1-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'",
|
|
|
|
# Torch
|
|
"torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.1%2Brocm6.0-cp312-cp312-linux_x86_64.whl ; python_version == '3.12'",
|
|
"torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl ; python_version == '3.11'",
|
|
"torch @ https://download.pytorch.org/whl/rocm6.0/torch-2.3.1%2Brocm6.0-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'",
|
|
|
|
# Exl2
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/rocm/exllamav2/exllamav2-0.1.8+rocm6.0.torch2.3.1-cp312-cp312-linux_x86_64.whl ; python_version == '3.12'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/rocm/exllamav2/exllamav2-0.1.8+rocm6.0.torch2.3.1-cp311-cp311-linux_x86_64.whl ; python_version == '3.11'",
|
|
"exllamav2 @ https://royallab-pip-index.netlify.app/whl/rocm/exllamav2/exllamav2-0.1.8+rocm6.0.torch2.3.1-cp310-cp310-linux_x86_64.whl ; python_version == '3.10'",
|
|
]
|
|
|
|
# MARK: Ruff options
|
|
|
|
[tool.ruff]
|
|
# Exclude a variety of commonly ignored directories.
|
|
exclude = [
|
|
".git",
|
|
".git-rewrite",
|
|
".mypy_cache",
|
|
".pyenv",
|
|
".pytest_cache",
|
|
".ruff_cache",
|
|
".venv",
|
|
".vscode",
|
|
"__pypackages__",
|
|
"_build",
|
|
"build",
|
|
"dist",
|
|
"node_modules",
|
|
"site-packages",
|
|
"venv",
|
|
]
|
|
|
|
# Same as Black.
|
|
line-length = 88
|
|
indent-width = 4
|
|
|
|
# Assume Python 3.10
|
|
target-version = "py310"
|
|
|
|
[tool.ruff.lint]
|
|
# Enable preview
|
|
preview = true
|
|
|
|
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
|
|
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
|
|
# McCabe complexity (`C901`) by default.
|
|
# Enable flake8-bugbear (`B`) rules, in addition to the defaults.
|
|
select = ["E4", "E7", "E9", "F", "B"]
|
|
extend-select = [
|
|
"D419", # empty-docstring
|
|
"PLC2401", # non-ascii-name
|
|
"E501", # line-too-long
|
|
"W291", # trailing-whitespace
|
|
"PLC0414", # useless-import-alias
|
|
"PLE0101", # return-in-init
|
|
"F706", # return-outside-function
|
|
"F704", # yield-outside-function
|
|
"PLE0116", # continue-in-finally
|
|
"PLE0117", # nonlocal-without-binding
|
|
"PLE0241", # duplicate-bases
|
|
"PLE0302", # unexpected-special-method-signature
|
|
"PLE0604", # invalid-all-object
|
|
"PLE0704", # misplaced-bare-raise
|
|
"PLE1205", # logging-too-many-args
|
|
"PLE1206", # logging-too-few-args
|
|
"PLE1307", # bad-string-format-type
|
|
"PLE1310", # bad-str-strip-call
|
|
"PLE1507", # invalid-envvar-value
|
|
"PLR0124", # comparison-with-itself
|
|
"PLR0202", # no-classmethod-decorator
|
|
"PLR0203", # no-staticmethod-decorator
|
|
"PLR0206", # property-with-parameters
|
|
"PLR1704", # redefined-argument-from-local
|
|
"PLR1711", # useless-return
|
|
"C416", # unnecessary-comprehension
|
|
"PLW0108", # unnecessary-lambda
|
|
"PLW0127", # self-assigning-variable
|
|
"PLW0129", # assert-on-string-literal
|
|
"PLW0602", # global-variable-not-assigned
|
|
"PLW0604", # global-at-module-level
|
|
"F401", # unused-import
|
|
"F841", # unused-variable
|
|
"E722", # bare-except
|
|
"PLW0711", # binary-op-exception
|
|
"PLW1501", # bad-open-mode
|
|
"PLW1508", # invalid-envvar-default
|
|
"PLW1509", # subprocess-popen-preexec-fn
|
|
]
|
|
ignore = [
|
|
"PLR6301", # no-self-use
|
|
"UP004", # useless-object-inheritance
|
|
"PLR0904", # too-many-public-methods
|
|
"PLR0911", # too-many-return-statements
|
|
"PLR0912", # too-many-branches
|
|
"PLR0913", # too-many-arguments
|
|
"PLR0914", # too-many-locals
|
|
"PLR0915", # too-many-statements
|
|
"PLR0916", # too-many-boolean-expressions
|
|
"PLW0120", # useless-else-on-loop
|
|
"PLW0406", # import-self
|
|
"PLW0603", # global-statement
|
|
"PLW1641", # eq-without-hash
|
|
]
|
|
|
|
# Allow fix for all enabled rules (when `--fix`) is provided.
|
|
fixable = ["ALL"]
|
|
unfixable = ["B"]
|
|
|
|
# Allow unused variables when underscore-prefixed.
|
|
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
|
|
[tool.ruff.format]
|
|
# Like Black, use double quotes for strings.
|
|
quote-style = "double"
|
|
|
|
# Like Black, indent with spaces, rather than tabs.
|
|
indent-style = "space"
|
|
|
|
# Like Black, respect magic trailing commas.
|
|
skip-magic-trailing-comma = false
|
|
|
|
# Like Black, automatically detect the appropriate line ending.
|
|
line-ending = "auto"
|