diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py
index c1e6681..7eb5bfe 100644
--- a/endpoints/OAI/utils/chat_completion.py
+++ b/endpoints/OAI/utils/chat_completion.py
@@ -286,6 +286,16 @@ async def apply_chat_template(
                     "add_generation_prompt is False"
                 )
 
+        # Removes the starting BOS token if the model adds one
+        # This is to prevent add_bos_token from adding multiple bos tokens
+        bos_token = template_vars.get("bos_token")
+        if (
+            bos_token
+            and model.container.hf_model.add_bos_token()
+            and prompt.startswith(bos_token)
+        ):
+            prompt = prompt.removeprefix(bos_token)
+
         # Add template metadata
         await _append_template_metadata(data, template_vars)