diff --git a/backends/exllamav3/model.py b/backends/exllamav3/model.py index ab308e5..ca8412f 100644 --- a/backends/exllamav3/model.py +++ b/backends/exllamav3/model.py @@ -62,6 +62,8 @@ class ExllamaV3Container(BaseModelContainer): config: Optional[Config] generator: Optional[AsyncGenerator] = None tokenizer_config: Optional[TokenizerConfig] = None + generator: Optional[AsyncGenerator] + tokenizer_config: Optional[TokenizerConfig] # Class-specific vars gpu_split: List[float] | None = None @@ -89,6 +91,13 @@ class ExllamaV3Container(BaseModelContainer): self = cls() + self.model = None + self.cache = None + self.tokenizer = None + self.config = None + self.generator = None + self.tokenizer_config = None + logger.warning( "ExllamaV3 is currently in an alpha state. " "Please note that all config options may not work."