Model: Set default max_batch_size
This commit is contained in:
parent
8c75b29923
commit
0d949d00b9
1 changed files with 1 additions and 1 deletions
|
|
@ -180,7 +180,7 @@ class ExllamaV3Container(BaseModelContainer):
|
|||
self.cache = Cache(self.model, max_num_tokens=self.cache_size)
|
||||
|
||||
# Max batch size
|
||||
self.max_batch_size = kwargs.get("max_batch_size")
|
||||
self.max_batch_size = unwrap(kwargs.get("max_batch_size"), 256)
|
||||
|
||||
# Make sure chunk size is >= 256, keep near or below max seq len
|
||||
user_chunk_size = unwrap(kwargs.get("chunk_size"), 2048)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue