Revert "Model: Skip empty token chunks"

This reverts commit 21516bd7b5. This skips EOS and implementing it the proper way seems more costly than necessary. Signed-off-by: kingbri <bdashore3@proton.me>
2024-07-22 18:34:00 -04:00 · 2024-07-22 18:34:00 -04:00 · 191600a150
commit 191600a150
parent 15f891b277
1 changed files with 4 additions and 6 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@ -1185,15 +1185,13 @@ class ExllamaV2Container:
                result_id = result.get("identifier")

                if stage == "streaming" and result_id == job_id:
-                    chunk_tokens = result.get("token_ids")
-                    if chunk_tokens is None:
-                        continue
-                    else:
-                        generated_tokens += chunk_tokens.size(dim=0)
-
                    chunk = unwrap(result.get("text"), "")
                    full_response += chunk

+                    chunk_tokens = result.get("token_ids")
+                    if chunk_tokens is not None:
+                        generated_tokens += chunk_tokens.size(dim=0)
+
                    generation = {
                        "text": chunk,
                        "prompt_tokens": context_len,