diff --git a/colab/TabbyAPI_Colab_Example.ipynb b/colab/TabbyAPI_Colab_Example.ipynb
index 7af5a57..b8e32f0 100644
--- a/colab/TabbyAPI_Colab_Example.ipynb
+++ b/colab/TabbyAPI_Colab_Example.ipynb
@@ -56,7 +56,7 @@
         "%cd tabbyAPI\n",
         "\n",
         "# Install cuda requirements\n",
-        "!pip install .[cu121] -q\n",
+        "!pip install .[cu12] -q\n",
         "!pip install huggingface-hub -q\n",
         "\n",
         "# Download cloudflared tunnel\n",
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 705d76f..58aa61f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -26,8 +26,8 @@ WORKDIR /app
 # Get requirements
 COPY pyproject.toml .
 
-# Install packages specified in pyproject.toml cu121, extras
-RUN pip install --no-cache-dir .[cu121,extras]
+# Install packages specified in pyproject.toml cu12, extras
+RUN pip install --no-cache-dir .[cu12,extras]
 
 RUN rm pyproject.toml
 
diff --git a/docs/01.-Getting-Started.md b/docs/01.-Getting-Started.md
index 4bf0b3e..05b87e9 100644
--- a/docs/01.-Getting-Started.md
+++ b/docs/01.-Getting-Started.md
@@ -47,7 +47,7 @@ To get started, make sure you have the following installed on your system:
         1. On Windows: `.\venv\Scripts\activate`
         2. On Linux: `source venv/bin/activate`
 3. Install the pyproject features based on your system:
-    1. Cuda 12.x: `pip install -U .[cu121]`
+    1. Cuda 12.x: `pip install -U .[cu12]`
     2. ROCm 5.6: `pip install -U .[amd]`
 4. Start the API by either
     1. Run `start.bat/sh`. The script will check if you're in a conda environment and skip venv checks.
@@ -98,7 +98,7 @@ There are a couple ways to update TabbyAPI:
 These scripts exit after running their respective tasks. To start TabbyAPI, run `start.bat` or `start.sh`.
 
 2. **Manual** - Install the pyproject features and update dependencies depending on your GPU:
-    1. `pip install -U .[cu121]` = CUDA 12.x
+    1. `pip install -U .[cu12]` = CUDA 12.x
     2. `pip install -U .[amd]` = ROCm 6.0
 
 If you don't want to update dependencies that come from wheels (torch, exllamav2, and flash attention 2), use `pip install .` or pass the `--nowheel` flag when invoking the start scripts.
@@ -121,7 +121,7 @@ NOTE:
 Here are ways to install exllamav2:
 
 1. From a [wheel/release](https://github.com/turboderp/exllamav2#method-2-install-from-release-with-prebuilt-extension) (Recommended)
-    1. Find the version that corresponds with your cuda and python version. For example, a wheel with `cu121` and `cp311` corresponds to CUDA 12.1 and python 3.11
+    1. Find the version that corresponds with your cuda and python version. For example, a wheel with `cu12` and `cp311` corresponds to CUDA 12.1 and python 3.11
 2. From [pip](https://github.com/turboderp/exllamav2#method-3-install-from-pypi): `pip install exllamav2`
     2. This is a JIT compiled extension, which means that the initial launch of tabbyAPI will take some time. The build may also not work due to improper environment configuration.
 3. From [source](https://github.com/turboderp/exllamav2#method-1-install-from-source)
diff --git a/main.py b/main.py
index 7bb2450..661613e 100644
--- a/main.py
+++ b/main.py
@@ -151,7 +151,7 @@ def entrypoint(
             "Or you can manually run a requirements update "
             "using the following command:\n\n"
             "For CUDA 12.1:\n"
-            "pip install --upgrade .[cu121]\n\n"
+            "pip install --upgrade .[cu12]\n\n"
             "For ROCm:\n"
             "pip install --upgrade .[amd]\n\n"
         )
diff --git a/pyproject.toml b/pyproject.toml
index a45de2a..eebba3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ extras = [
 dev = [
     "ruff == 0.11.10"
 ]
-cu121 = [
+cu12 = [
     # Torch (Extra index URLs not support in pyproject.toml)
     "torch @ https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.13'",
     "torch @ https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl ; platform_system == 'Windows' and python_version == '3.12'",
diff --git a/start.py b/start.py
index 619b07e..2d30454 100644
--- a/start.py
+++ b/start.py
@@ -41,14 +41,13 @@ def get_user_choice(question: str, options_dict: dict):
 def get_install_features(lib_name: str = None):
     """Fetches the appropriate requirements file depending on the GPU"""
     install_features = None
-    possible_features = ["cu121", "cu118", "amd"]
+    possible_features = ["cu12", "amd"]
 
     if not lib_name:
         # Ask the user for the GPU lib
         gpu_lib_choices = {
-            "A": {"pretty": "NVIDIA Cuda 12.x", "internal": "cu121"},
-            "B": {"pretty": "NVIDIA Cuda 11.8 (Unsupported)", "internal": "cu118"},
-            "C": {"pretty": "AMD", "internal": "amd"},
+            "A": {"pretty": "NVIDIA Cuda 12.x", "internal": "cu12"},
+            "B": {"pretty": "AMD", "internal": "amd"},
         }
         user_input = get_user_choice(
             "Select your GPU. If you don't know, select Cuda 12.x (A)",
@@ -79,7 +78,7 @@ def get_install_features(lib_name: str = None):
         if platform.system() == "Windows":
             print(
                 "ERROR: TabbyAPI does not support AMD and Windows. "
-                "Please use Linux and ROCm 6.0. Exiting."
+                "Please use Linux and ROCm 6.4. Exiting."
             )
             sys.exit(0)
 
@@ -139,24 +138,17 @@ def add_start_args(parser: argparse.ArgumentParser):
     )
 
 
-def migrate_gpu_lib():
-    gpu_lib_path = pathlib.Path("gpu_lib.txt")
+def migrate_start_options(start_options: dict):
+    migrated = False
 
-    if not gpu_lib_path.exists():
-        return
+    # Migrate gpu_lib key
+    gpu_lib = start_options.get("gpu_lib")
+    if (gpu_lib == "cu121" or gpu_lib == "cu118"):
+        print("GPU lib key is legacy, migrating to cu12")
+        start_options["gpu_lib"] = "cu12"
+        migrated = True
 
-    print("Migrating gpu_lib.txt to the new start_options.json")
-    with open("gpu_lib.txt", "r") as gpu_lib_file:
-        start_options["gpu_lib"] = gpu_lib_file.readline().strip()
-        start_options["first_run_done"] = True
-
-    # Remove the old file
-    gpu_lib_path.unlink()
-
-    print(
-        "Successfully migrated gpu lib options to start_options. "
-        "The old file has been deleted."
-    )
+    return migrated
 
 
 if __name__ == "__main__":
@@ -183,6 +175,7 @@ if __name__ == "__main__":
     add_start_args(parser)
     args, _ = parser.parse_known_args()
     script_ext = "bat" if platform.system() == "Windows" else "sh"
+    do_start_options_write = False
 
     start_options_path = pathlib.Path("start_options.json")
     if start_options_path.exists():
@@ -190,6 +183,7 @@ if __name__ == "__main__":
             start_options = json.load(start_options_file)
             print("Loaded your saved preferences from `start_options.json`")
 
+            do_start_options_write = migrate_start_options(start_options)
         if start_options.get("first_run_done"):
             first_run = False
     else:
@@ -198,9 +192,6 @@ if __name__ == "__main__":
             "Getting things ready..."
         )
 
-    # Migrate from old setting storage
-    migrate_gpu_lib()
-
     # Set variables that rely on start options
     first_run = not start_options.get("first_run_done")
 
@@ -240,15 +231,7 @@ if __name__ == "__main__":
             start_options["first_run_done"] = True
 
             # Save start options on first run
-            with open("start_options.json", "w") as start_file:
-                start_file.write(json.dumps(start_options))
-
-                print(
-                    "Successfully wrote your start script options to "
-                    "`start_options.json`. \n"
-                    "If something goes wrong, editing or deleting the file "
-                    "will reinstall TabbyAPI as a first-time user."
-                )
+            do_start_options_write = True
 
         if args.update_deps:
             print(
@@ -262,6 +245,17 @@ if __name__ == "__main__":
                 "inside the `update_scripts` folder."
             )
 
+    if do_start_options_write:
+        with open("start_options.json", "w") as start_file:
+            start_file.write(json.dumps(start_options))
+
+            print(
+                "Successfully wrote your start script options to "
+                "`start_options.json`. \n"
+                "If something goes wrong, editing or deleting the file "
+                "will reinstall TabbyAPI as a first-time user."
+            )
+
     # Expand the parser if it's not fully created
     if not has_full_parser:
         from common.args import init_argparser