OAI: Pass mm_embeddings to tool call generation

Don't exclude the vision embeddings when regenerating for a tool call. Signed-off-by: kingbri <bdashore3@proton.me>
2024-11-28 23:27:59 -05:00 · 2024-11-28 23:27:59 -05:00 · 2e06fb01d3
commit 2e06fb01d3
parent a52610fb19
1 changed files with 10 additions and 3 deletions
--- a/endpoints/OAI/utils/chat_completion.py
+++ b/endpoints/OAI/utils/chat_completion.py
@ -454,16 +454,23 @@ async def generate_tool_calls(
        if gen["stop_str"] in tool_data.tool_call_start:
            if "text" in gen:
                # non streaming, all generations will have the text they generated
-                pre_tool_prompt, _ = await apply_chat_template(data, gen["text"])
+                pre_tool_prompt, mm_embeddings = await apply_chat_template(
+                    data, gen["text"]
+                )
            elif current_generations is not None:
                # streaming, we wont have text in the generation,
                # we'll have to use the current_generations
-                pre_tool_prompt, _ = await apply_chat_template(data, current_generations)
+                pre_tool_prompt, mm_embeddings = await apply_chat_template(
+                    data, current_generations
+                )

            gen_tasks.append(
                asyncio.create_task(
                    model.container.generate(
-                        pre_tool_prompt, request.state.id, **gen_params
+                        pre_tool_prompt,
+                        request.state.id,
+                        embeddings=mm_embeddings,
+                        **gen_params,
                    )
                )
            )