Model: Create generator on load
This commit is contained in:
parent
0d949d00b9
commit
58c380b8ca
1 changed files with 3 additions and 6 deletions
|
|
@ -345,6 +345,9 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
async for value in iterate_in_threadpool(generator):
|
||||
yield value
|
||||
|
||||
# Create async generator
|
||||
await self.create_generator()
|
||||
|
||||
# Clean up any extra vram usage from torch and cuda
|
||||
# (Helps reduce VRAM bottlenecking on Windows)
|
||||
gc.collect()
|
||||
|
|
@ -774,12 +777,6 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
f"max_seq_len {self.max_seq_len}"
|
||||
)
|
||||
|
||||
self.generator = AsyncGenerator(
|
||||
model=self.model,
|
||||
cache=self.cache,
|
||||
tokenizer=self.tokenizer,
|
||||
)
|
||||
|
||||
generation = {}
|
||||
job = AsyncJob(
|
||||
self.generator,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue