Patryk Studzinski commited on
Commit
9153886
·
1 Parent(s): eaa2e37

Fix: Remove unsupported use_xformers_attention parameter

Browse files
Files changed (1) hide show
  1. app/models/huggingface_local.py +0 -1
app/models/huggingface_local.py CHANGED
@@ -149,7 +149,6 @@ class HuggingFaceLocal(BaseLLM):
149
  temperature=temperature,
150
  top_p=top_p,
151
  use_cache=True, # CRITICAL: Enable KV cache
152
- use_xformers_attention=False, # CPU doesn't support this
153
  eos_token_id=self.tokenizer.eos_token_id,
154
  pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.pad_token_id is None else self.tokenizer.pad_token_id,
155
  )
 
149
  temperature=temperature,
150
  top_p=top_p,
151
  use_cache=True, # CRITICAL: Enable KV cache
 
152
  eos_token_id=self.tokenizer.eos_token_id,
153
  pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.pad_token_id is None else self.tokenizer.pad_token_id,
154
  )