diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..79c53eb --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Use an official CUDA runtime with Ubuntu as a parent image +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 + +# Install Python and pip +RUN apt-get update && apt-get install -y \ + python3.11 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# Define a build-time argument for conditional installation +ARG INSTALL_FSCHAT=false + +# Set the environment variable based on the build argument +ENV INSTALL_FSCHAT=$INSTALL_FSCHAT + +# Set the working directory in the container +WORKDIR /usr/src/app + +# Copy the current directory contents into the container at /usr/src/app +COPY . . + +# Install torch with CUDA support and exllamav2 +RUN pip install torch --extra-index-url https://download.pytorch.org/whl/cu121 +RUN pip install exllamav2 + +# Install any other needed packages specified in requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Conditional installation of fschat[model_worker] +RUN if [ "$INSTALL_FSCHAT" = "true" ] ; then pip install fschat[model_worker] ; fi + +# Copy the sample config file to the main config +RUN cp config_sample.yml config.yml + +# Make port 5000 available to the world outside this container +EXPOSE 5000 + +# Define environment variable +ENV NAME World + +# Run main.py when the container launches +CMD ["python3", "main.py"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0f6e281 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +version: '3.8' +services: + tabbyapi: + build: + context: . + args: + INSTALL_FSCHAT: "true" # Set this to "true" or "false" as needed + ports: + - "5000:5000" + runtime: nvidia + environment: + - NAME=TabbyAPI + - NVIDIA_VISIBLE_DEVICES=all + volumes: + - /mnt/nvme/models:/usr/src/app/models