173 lines
No EOL
6.6 KiB
Text
173 lines
No EOL
6.6 KiB
Text
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"gpuType": "T4"
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"#CELL 1\n",
|
|
"#@title Keep this widget playing to prevent Colab from disconnecting you { display-mode: \"form\" }\n",
|
|
"#@markdown Press play on the audio player that will appear below:\n",
|
|
"%%html\n",
|
|
"<audio src=\"https://oobabooga.github.io/silence.m4a\" controls>"
|
|
],
|
|
"metadata": {
|
|
"id": "zU0omxLLm8AN"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "ThqTbSnPalJl"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# @title # **Cell 1 - Installation w/ Model Downloading** { display-mode: \"form\" }\n",
|
|
"# @markdown ---\n",
|
|
"# @markdown # Download Model\n",
|
|
"# Select model and branch\n",
|
|
"repo_id = \"royallab/airoboros-mistral2.2-7b-exl2\" # @param {type:\"string\"}\n",
|
|
"revision = \"6bpw\" # @param {type:\"string\"}\n",
|
|
"# @markdown ---\n",
|
|
"\n",
|
|
"# Install tabbyAPI\n",
|
|
"%cd /content/\n",
|
|
"\n",
|
|
"!git clone https://github.com/theroyallab/tabbyAPI\n",
|
|
"%cd tabbyAPI\n",
|
|
"!pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu118-cp310-cp310-linux_x86_64.whl -q\n",
|
|
"!pip install -r requirements.txt -q\n",
|
|
"!pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.6/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl -q\n",
|
|
"!pip install huggingface-hub -q\n",
|
|
"\n",
|
|
"# Download cloudflared tunnel\n",
|
|
"%cd /content/tabbyAPI/\n",
|
|
"\n",
|
|
"!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared\n",
|
|
"!chmod a+x cloudflared\n",
|
|
"\n",
|
|
"# Download model repo\n",
|
|
"%cd /content/tabbyAPI/\n",
|
|
"\n",
|
|
"from huggingface_hub import snapshot_download\n",
|
|
"snapshot_download(repo_id=repo_id, revision=revision, local_dir=f\"./models/{repo_id.replace('/', '_')}\")\n",
|
|
"\n",
|
|
"!wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb\n",
|
|
"!dpkg -i cuda-keyring_1.0-1_all.deb\n",
|
|
"!apt-get -qq update\n",
|
|
"!apt-get -y -qq install cuda\n",
|
|
"\n",
|
|
"print(f\"Model dir: './models/{repo_id.replace('/', '_')}'\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# @title # **Cell 2 - Edit Config and Start Tabby** { display-mode: \"form\" }\n",
|
|
"# @markdown ---\n",
|
|
"# @markdown # Edit Config\n",
|
|
"\n",
|
|
"model = repo_id.replace('/', '_')\n",
|
|
"ContextSize = 4096 # @param {type:\"raw\"}\n",
|
|
"RopeScale = 1.0 # @param {type:\"raw\"}\n",
|
|
"RopeAlpha = 1.0 # @param {type:\"raw\"}\n",
|
|
"\n",
|
|
"# Setup Config - edit parameters to fit your needs\n",
|
|
"%cd /content/tabbyAPI/\n",
|
|
"\n",
|
|
"write = f'''\n",
|
|
"# Sample YAML file for configuration.\n",
|
|
"# Comment out values as needed. Every value has a default within the application.\n",
|
|
"\n",
|
|
"# Unless specified in the comments, DO NOT put these options in quotes!\n",
|
|
"# You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n",
|
|
"\n",
|
|
"# Options for networking\n",
|
|
"network:\n",
|
|
" # The IP to host on (default: 127.0.0.1).\n",
|
|
" # Use 0.0.0.0 to expose on all network adapters\n",
|
|
" host: 0.0.0.0\n",
|
|
"\n",
|
|
" # The port to host on (default: 5000)\n",
|
|
" port: 5000\n",
|
|
"\n",
|
|
"# Options for model overrides and loading\n",
|
|
"model:\n",
|
|
" # Overrides the directory to look for models (default: models)\n",
|
|
" # Windows users, DO NOT put this path in quotes! This directory will be invalid otherwise.\n",
|
|
" model_dir: models\n",
|
|
"\n",
|
|
" # An initial model to load. Make sure the model is located in the model directory!\n",
|
|
" # A model can be loaded later via the API.\n",
|
|
" model_name: {model}\n",
|
|
"\n",
|
|
" # Sends dummy model names when the models endpoint is queried\n",
|
|
" # Enable this if the program is looking for a specific OAI model\n",
|
|
" use_dummy_models: False\n",
|
|
"\n",
|
|
" # The below parameters apply only if model_name is set\n",
|
|
"\n",
|
|
" # Maximum model context length (default: 4096)\n",
|
|
" max_seq_len: {ContextSize}\n",
|
|
"\n",
|
|
" # Automatically allocate resources to GPUs (default: True)\n",
|
|
" gpu_split_auto: True\n",
|
|
"\n",
|
|
" # An integer array of GBs of vram to split between GPUs (default: [])\n",
|
|
" # gpu_split: [20.6, 24]\n",
|
|
"\n",
|
|
" # Rope scaling parameters (default: 1.0)\n",
|
|
" rope_scale: {RopeScale}\n",
|
|
" rope_alpha: {RopeAlpha}\n",
|
|
"\n",
|
|
" # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False)\n",
|
|
" no_flash_attention: False\n",
|
|
"\n",
|
|
" # Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16)\n",
|
|
" cache_mode: FP16\n",
|
|
"\n",
|
|
" # Options for draft models (speculative decoding). This will use more VRAM!\n",
|
|
" draft:\n",
|
|
" # Overrides the directory to look for draft (default: models)\n",
|
|
" # draft_model_dir: Your draft model directory path\n",
|
|
"\n",
|
|
" # An initial draft model to load. Make sure this model is located in the model directory!\n",
|
|
" # A draft model can be loaded later via the API.\n",
|
|
" # draft_model_name: A model name\n",
|
|
"\n",
|
|
" # Rope parameters for draft models (default: 1.0)\n",
|
|
" # draft_rope_alpha: 1.0\n",
|
|
"'''\n",
|
|
"with open(\"./config.yml\", \"w\") as file:\n",
|
|
" file.write(write)\n",
|
|
"!cat config.yml\n",
|
|
"\n",
|
|
"%cd /content/tabbyAPI/\n",
|
|
"\n",
|
|
"!python main.py & ./cloudflared tunnel --url localhost:5000"
|
|
],
|
|
"metadata": {
|
|
"id": "aD7Rz8ZCeShD"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |