Add Colab example

*note: this uses wheels for python 3.10 and torch 2.1.0+cu118 which is the current default in colab
2023-12-02 23:19:53 -08:00 · 2023-12-02 23:19:53 -08:00 · 2a9e4ca051
commit 2a9e4ca051
parent e740b53478
1 changed files with 168 additions and 0 deletions
--- a/TabbyAPI_Colab_Example.ipynb
+++ b/TabbyAPI_Colab_Example.ipynb
@ -0,0 +1,168 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ThqTbSnPalJl"
+      },
+      "outputs": [],
+      "source": [
+        "# Install tabbyAPI\n",
+        "%cd /content/\n",
+        "\n",
+        "!git clone https://github.com/theroyallab/tabbyAPI\n",
+        "%cd tabbyAPI\n",
+        "!pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu118-cp310-cp310-linux_x86_64.whl -q\n",
+        "!pip install -r requirements.txt -q\n",
+        "!pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.6/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl -q\n",
+        "!pip install huggingface-hub -q"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Download cloudflared tunnel\n",
+        "%cd /content/tabbyAPI/\n",
+        "\n",
+        "!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared\n",
+        "!chmod a+x cloudflared"
+      ],
+      "metadata": {
+        "id": "tpcfGFPzf1C0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Download model repo\n",
+        "%cd /content/tabbyAPI/\n",
+        "\n",
+        "# Select model and branch\n",
+        "repo_id = \"royallab/airoboros-mistral2.2-7b-exl2\"\n",
+        "revision = \"6bpw\"\n",
+        "\n",
+        "from huggingface_hub import snapshot_download\n",
+        "snapshot_download(repo_id=repo_id, revision=revision, local_dir=f\"./models/{repo_id.replace('/', '_')}\")\n",
+        "\n",
+        "print(f\"Model dir: './models/{repo_id.replace('/', '_')}'\")"
+      ],
+      "metadata": {
+        "id": "ZOZwXx0cc4l1"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Setup Config - edit parameters to fit your needs\n",
+        "%cd /content/tabbyAPI/\n",
+        "\n",
+        "write = '''\n",
+        "# Sample YAML file for configuration.\n",
+        "# Comment out values as needed. Every value has a default within the application.\n",
+        "\n",
+        "# Unless specified in the comments, DO NOT put these options in quotes!\n",
+        "# You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n",
+        "\n",
+        "# Options for networking\n",
+        "network:\n",
+        "  # The IP to host on (default: 127.0.0.1).\n",
+        "  # Use 0.0.0.0 to expose on all network adapters\n",
+        "  host: 0.0.0.0\n",
+        "\n",
+        "  # The port to host on (default: 5000)\n",
+        "  port: 5000\n",
+        "\n",
+        "# Options for model overrides and loading\n",
+        "model:\n",
+        "  # Overrides the directory to look for models (default: models)\n",
+        "  # Windows users, DO NOT put this path in quotes! This directory will be invalid otherwise.\n",
+        "  model_dir: models\n",
+        "\n",
+        "  # An initial model to load. Make sure the model is located in the model directory!\n",
+        "  # A model can be loaded later via the API.\n",
+        "  model_name: royallab_airoboros-mistral2.2-7b-exl2\n",
+        "\n",
+        "  # Sends dummy model names when the models endpoint is queried\n",
+        "  # Enable this if the program is looking for a specific OAI model\n",
+        "  use_dummy_models: False\n",
+        "\n",
+        "  # The below parameters apply only if model_name is set\n",
+        "\n",
+        "  # Maximum model context length (default: 4096)\n",
+        "  max_seq_len: 4096\n",
+        "\n",
+        "  # Automatically allocate resources to GPUs (default: True)\n",
+        "  gpu_split_auto: True\n",
+        "\n",
+        "  # An integer array of GBs of vram to split between GPUs (default: [])\n",
+        "  # gpu_split: [20.6, 24]\n",
+        "\n",
+        "  # Rope scaling parameters (default: 1.0)\n",
+        "  rope_scale: 1.0\n",
+        "  rope_alpha: 1.0\n",
+        "\n",
+        "  # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False)\n",
+        "  no_flash_attention: False\n",
+        "\n",
+        "  # Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16)\n",
+        "  cache_mode: FP16\n",
+        "\n",
+        "  # Options for draft models (speculative decoding). This will use more VRAM!\n",
+        "  draft:\n",
+        "    # Overrides the directory to look for draft (default: models)\n",
+        "    # draft_model_dir: Your draft model directory path\n",
+        "\n",
+        "    # An initial draft model to load. Make sure this model is located in the model directory!\n",
+        "    # A draft model can be loaded later via the API.\n",
+        "    # draft_model_name: A model name\n",
+        "\n",
+        "    # Rope parameters for draft models (default: 1.0)\n",
+        "    # draft_rope_alpha: 1.0\n",
+        "'''\n",
+        "with open(\"./config.yml\", \"w\") as file:\n",
+        "    file.write(write)\n",
+        "!cat config.yml"
+      ],
+      "metadata": {
+        "id": "aD7Rz8ZCeShD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Start API\n",
+        "%cd /content/tabbyAPI/\n",
+        "\n",
+        "!python main.py & ./cloudflared tunnel --url localhost:5000"
+      ],
+      "metadata": {
+        "id": "ZhSK71UeeMJi"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}