diff --git a/colab/TabbyAPI_Colab_Example.ipynb b/colab/TabbyAPI_Colab_Example.ipynb index 7af5a57..b8e32f0 100644 --- a/colab/TabbyAPI_Colab_Example.ipynb +++ b/colab/TabbyAPI_Colab_Example.ipynb @@ -56,7 +56,7 @@ "%cd tabbyAPI\n", "\n", "# Install cuda requirements\n", - "!pip install .[cu121] -q\n", + "!pip install .[cu12] -q\n", "!pip install huggingface-hub -q\n", "\n", "# Download cloudflared tunnel\n", diff --git a/docker/Dockerfile b/docker/Dockerfile index 705d76f..58aa61f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,8 +26,8 @@ WORKDIR /app # Get requirements COPY pyproject.toml . -# Install packages specified in pyproject.toml cu121, extras -RUN pip install --no-cache-dir .[cu121,extras] +# Install packages specified in pyproject.toml cu12, extras +RUN pip install --no-cache-dir .[cu12,extras] RUN rm pyproject.toml diff --git a/docs/01.-Getting-Started.md b/docs/01.-Getting-Started.md index 4bf0b3e..05b87e9 100644 --- a/docs/01.-Getting-Started.md +++ b/docs/01.-Getting-Started.md @@ -47,7 +47,7 @@ To get started, make sure you have the following installed on your system: 1. On Windows: `.\venv\Scripts\activate` 2. On Linux: `source venv/bin/activate` 3. Install the pyproject features based on your system: - 1. Cuda 12.x: `pip install -U .[cu121]` + 1. Cuda 12.x: `pip install -U .[cu12]` 2. ROCm 5.6: `pip install -U .[amd]` 4. Start the API by either 1. Run `start.bat/sh`. The script will check if you're in a conda environment and skip venv checks. @@ -98,7 +98,7 @@ There are a couple ways to update TabbyAPI: These scripts exit after running their respective tasks. To start TabbyAPI, run `start.bat` or `start.sh`. 2. **Manual** - Install the pyproject features and update dependencies depending on your GPU: - 1. `pip install -U .[cu121]` = CUDA 12.x + 1. `pip install -U .[cu12]` = CUDA 12.x 2. `pip install -U .[amd]` = ROCm 6.0 If you don't want to update dependencies that come from wheels (torch, exllamav2, and flash attention 2), use `pip install .` or pass the `--nowheel` flag when invoking the start scripts. @@ -121,7 +121,7 @@ NOTE: Here are ways to install exllamav2: 1. From a [wheel/release](https://github.com/turboderp/exllamav2#method-2-install-from-release-with-prebuilt-extension) (Recommended) - 1. Find the version that corresponds with your cuda and python version. For example, a wheel with `cu121` and `cp311` corresponds to CUDA 12.1 and python 3.11 + 1. Find the version that corresponds with your cuda and python version. For example, a wheel with `cu12` and `cp311` corresponds to CUDA 12.1 and python 3.11 2. From [pip](https://github.com/turboderp/exllamav2#method-3-install-from-pypi): `pip install exllamav2` 2. This is a JIT compiled extension, which means that the initial launch of tabbyAPI will take some time. The build may also not work due to improper environment configuration. 3. From [source](https://github.com/turboderp/exllamav2#method-1-install-from-source) diff --git a/main.py b/main.py index 7bb2450..661613e 100644 --- a/main.py +++ b/main.py @@ -151,7 +151,7 @@ def entrypoint( "Or you can manually run a requirements update " "using the following command:\n\n" "For CUDA 12.1:\n" - "pip install --upgrade .[cu121]\n\n" + "pip install --upgrade .[cu12]\n\n" "For ROCm:\n" "pip install --upgrade .[amd]\n\n" ) diff --git a/pyproject.toml b/pyproject.toml index a45de2a..eebba3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ extras = [ dev = [ "ruff == 0.11.10" ] -cu121 = [ +cu12 = [ # Torch (Extra index URLs not support in pyproject.toml) "torch @ https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'", "torch @ https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'", diff --git a/start.py b/start.py index 619b07e..2d30454 100644 --- a/start.py +++ b/start.py @@ -41,14 +41,13 @@ def get_user_choice(question: str, options_dict: dict): def get_install_features(lib_name: str = None): """Fetches the appropriate requirements file depending on the GPU""" install_features = None - possible_features = ["cu121", "cu118", "amd"] + possible_features = ["cu12", "amd"] if not lib_name: # Ask the user for the GPU lib gpu_lib_choices = { - "A": {"pretty": "NVIDIA Cuda 12.x", "internal": "cu121"}, - "B": {"pretty": "NVIDIA Cuda 11.8 (Unsupported)", "internal": "cu118"}, - "C": {"pretty": "AMD", "internal": "amd"}, + "A": {"pretty": "NVIDIA Cuda 12.x", "internal": "cu12"}, + "B": {"pretty": "AMD", "internal": "amd"}, } user_input = get_user_choice( "Select your GPU. If you don't know, select Cuda 12.x (A)", @@ -79,7 +78,7 @@ def get_install_features(lib_name: str = None): if platform.system() == "Windows": print( "ERROR: TabbyAPI does not support AMD and Windows. " - "Please use Linux and ROCm 6.0. Exiting." + "Please use Linux and ROCm 6.4. Exiting." ) sys.exit(0) @@ -139,24 +138,17 @@ def add_start_args(parser: argparse.ArgumentParser): ) -def migrate_gpu_lib(): - gpu_lib_path = pathlib.Path("gpu_lib.txt") +def migrate_start_options(start_options: dict): + migrated = False - if not gpu_lib_path.exists(): - return + # Migrate gpu_lib key + gpu_lib = start_options.get("gpu_lib") + if (gpu_lib == "cu121" or gpu_lib == "cu118"): + print("GPU lib key is legacy, migrating to cu12") + start_options["gpu_lib"] = "cu12" + migrated = True - print("Migrating gpu_lib.txt to the new start_options.json") - with open("gpu_lib.txt", "r") as gpu_lib_file: - start_options["gpu_lib"] = gpu_lib_file.readline().strip() - start_options["first_run_done"] = True - - # Remove the old file - gpu_lib_path.unlink() - - print( - "Successfully migrated gpu lib options to start_options. " - "The old file has been deleted." - ) + return migrated if __name__ == "__main__": @@ -183,6 +175,7 @@ if __name__ == "__main__": add_start_args(parser) args, _ = parser.parse_known_args() script_ext = "bat" if platform.system() == "Windows" else "sh" + do_start_options_write = False start_options_path = pathlib.Path("start_options.json") if start_options_path.exists(): @@ -190,6 +183,7 @@ if __name__ == "__main__": start_options = json.load(start_options_file) print("Loaded your saved preferences from `start_options.json`") + do_start_options_write = migrate_start_options(start_options) if start_options.get("first_run_done"): first_run = False else: @@ -198,9 +192,6 @@ if __name__ == "__main__": "Getting things ready..." ) - # Migrate from old setting storage - migrate_gpu_lib() - # Set variables that rely on start options first_run = not start_options.get("first_run_done") @@ -240,15 +231,7 @@ if __name__ == "__main__": start_options["first_run_done"] = True # Save start options on first run - with open("start_options.json", "w") as start_file: - start_file.write(json.dumps(start_options)) - - print( - "Successfully wrote your start script options to " - "`start_options.json`. \n" - "If something goes wrong, editing or deleting the file " - "will reinstall TabbyAPI as a first-time user." - ) + do_start_options_write = True if args.update_deps: print( @@ -262,6 +245,17 @@ if __name__ == "__main__": "inside the `update_scripts` folder." ) + if do_start_options_write: + with open("start_options.json", "w") as start_file: + start_file.write(json.dumps(start_options)) + + print( + "Successfully wrote your start script options to " + "`start_options.json`. \n" + "If something goes wrong, editing or deleting the file " + "will reinstall TabbyAPI as a first-time user." + ) + # Expand the parser if it's not fully created if not has_full_parser: from common.args import init_argparser