diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 2cc57f6..554fd50 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -709,6 +709,7 @@ class ExllamaV2Container: gen_settings.tfs = unwrap(kwargs.get("tfs"), 1.0) gen_settings.typical = unwrap(kwargs.get("typical"), 1.0) gen_settings.mirostat = unwrap(kwargs.get("mirostat"), False) + gen_settings.skew = unwrap(kwargs.get("skew"), 0) # DynaTemp settings max_temp = unwrap(kwargs.get("max_temp"), 1.0) diff --git a/common/sampling.py b/common/sampling.py index cc42f55..7201417 100644 --- a/common/sampling.py +++ b/common/sampling.py @@ -75,6 +75,11 @@ class BaseSamplerRequest(BaseModel): examples=[1.0], ) + skew: Optional[float] = Field( + default_factory=lambda: get_default_sampler_value("skew", 0.0), + examples=[0.0], + ) + frequency_penalty: Optional[float] = Field( default_factory=lambda: get_default_sampler_value("frequency_penalty", 0.0) ) @@ -295,6 +300,7 @@ class BaseSamplerRequest(BaseModel): "typical": self.typical, "min_p": self.min_p, "tfs": self.tfs, + "skew": self.skew, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, "repetition_penalty": self.repetition_penalty, diff --git a/sampler_overrides/sample_preset.yml b/sampler_overrides/sample_preset.yml index f3dac71..ec33cac 100644 --- a/sampler_overrides/sample_preset.yml +++ b/sampler_overrides/sample_preset.yml @@ -73,6 +73,9 @@ tfs: typical: override: 1.0 force: false +skew: + override: 0.0 + force: false # MARK: Penalty settings frequency_penalty: