OAI: Add response_prefix and fix BOS token issues in chat completions

response_prefix is used to add a prefix before generating the next
message. This is used in many cases such as continuining a prompt
(see #96).

Also if a template has BOS token specified, add_bos_token will
append two BOS tokens. Add a check which strips a starting BOS token
from the prompt if it exists.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-04-25 00:54:43 -04:00
parent ed7cd3cb59
commit fb1d2f34c1
4 changed files with 20 additions and 1 deletions

View file

@ -8,6 +8,7 @@ from uuid import uuid4
from fastapi import HTTPException
from jinja2 import TemplateError
from loguru import logger
from common import model
from common.networking import (
@ -153,6 +154,22 @@ def format_prompt_with_template(data: ChatCompletionRequest):
data.template_vars
)
# Append response prefix if present
if data.response_prefix:
if data.add_generation_prompt:
prompt += data.response_prefix
else:
logger.warning(
"Could not add response prefix because "
"add_generation_prompt is False"
)
# Removes the starting BOS token if present
# This is to prevent add_bos_token from adding multiple bos tokens
bos_token = special_tokens_dict.get("bos_token")
if bos_token and prompt.startswith(bos_token):
prompt = prompt.removeprefix(bos_token)
# Append template stop strings
if isinstance(data.stop, str):
data.stop = [data.stop] + template_stop_strings