Revert "Model: Skip empty token chunks"

This reverts commit 21516bd7b5.

This skips EOS and implementing it the proper way seems more
costly than necessary.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-07-22 18:34:00 -04:00
parent 15f891b277
commit 191600a150

View file

@ -1185,15 +1185,13 @@ class ExllamaV2Container:
result_id = result.get("identifier")
if stage == "streaming" and result_id == job_id:
chunk_tokens = result.get("token_ids")
if chunk_tokens is None:
continue
else:
generated_tokens += chunk_tokens.size(dim=0)
chunk = unwrap(result.get("text"), "")
full_response += chunk
chunk_tokens = result.get("token_ids")
if chunk_tokens is not None:
generated_tokens += chunk_tokens.size(dim=0)
generation = {
"text": chunk,
"prompt_tokens": context_len,