diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py
index 0c89f7e..11d4088 100644
--- a/endpoints/OAI/utils/chat_completion.py
+++ b/endpoints/OAI/utils/chat_completion.py
@@ -471,13 +471,13 @@ async def generate_tool_calls(
 
             if "text" in gen:
                 # non streaming, all generations will have the text they generated
-                pre_tool_prompt, mm_embeddings = await apply_chat_template(
+                pre_tool_prompt, embeddings = await apply_chat_template(
                     data, gen["text"]
                 )
             elif current_generations is not None:
                 # streaming, we wont have text in the generation,
                 # we'll have to use the current_generations
-                pre_tool_prompt, mm_embeddings = await apply_chat_template(
+                pre_tool_prompt, embeddings = await apply_chat_template(
                     data, current_generations
                 )
 
@@ -489,7 +489,7 @@ async def generate_tool_calls(
                         request_id,
                         pre_tool_prompt,
                         tool_data,
-                        embeddings=mm_embeddings,
+                        mm_embeddings=embeddings,
                     )
                 )
             )