OAI: Copy gen params for "n"

For multiple generations in the same request, nested arrays kept their
original reference, resulting in duplications. This will occur with
any collection type.

For optimization purposes, a deepcopy isn't run for the first iteration
since original references are created.

This is not the most elegant solution, but it works for the described
cases.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-05-26 22:54:34 -04:00 committed by Brian Dashore
parent b944f8d756
commit c8371e0f50
2 changed files with 24 additions and 4 deletions

View file

@ -3,6 +3,7 @@
import asyncio
import pathlib
from asyncio import CancelledError
from copy import deepcopy
from typing import List, Optional
from uuid import uuid4
@ -242,12 +243,21 @@ async def generate_chat_completion(
prompt: str, data: ChatCompletionRequest, model_path: pathlib.Path
):
gen_tasks: List[asyncio.Task] = []
gen_params = data.to_gen_params()
try:
for _ in range(0, data.n):
for n in range(0, data.n):
# Deepcopy gen params above the first index
# to ensure nested structures aren't shared
if n > 0:
task_gen_params = deepcopy(gen_params)
else:
task_gen_params = gen_params
gen_tasks.append(
asyncio.create_task(
model.container.generate(prompt, **data.to_gen_params())
model.container.generate(prompt, **task_gen_params)
)
)

View file

@ -3,6 +3,7 @@
import asyncio
import pathlib
from asyncio import CancelledError
from copy import deepcopy
from fastapi import HTTPException, Request
from typing import List, Optional
@ -111,12 +112,21 @@ async def generate_completion(data: CompletionRequest, model_path: pathlib.Path)
"""Non-streaming generate for completions"""
gen_tasks: List[asyncio.Task] = []
gen_params = data.to_gen_params()
try:
for _ in range(0, data.n):
for n in range(0, data.n):
# Deepcopy gen params above the first index
# to ensure nested structures aren't shared
if n > 0:
task_gen_params = deepcopy(gen_params)
else:
task_gen_params = gen_params
gen_tasks.append(
asyncio.create_task(
model.container.generate(data.prompt, **data.to_gen_params())
model.container.generate(data.prompt, **task_gen_params)
)
)