leonardlin commited on
Commit
3831a9c
β€’
1 Parent(s): 484081f

trying fa2 again

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -14,7 +14,7 @@ model_name = "augmxnt/shisa-7b-v1"
14
 
15
  # UI Settings
16
  title = "Shisa 7B"
17
- description = "Test out <a href='https://huggingface.co/augmxnt/shisa-7b-v1'>Shisa 7B</a> in either English or Japanese. If you aren't getting the right language outputs, you can try changing the system prompt to the appropriate language. Note, we are running `load_in_4bit` to fit in 16GB of VRAM."
18
  placeholder = "Type Here / ここにε…₯εŠ›γ—γ¦γγ γ•γ„"
19
  examples = [
20
  ["What are the best slices of pizza in New York City?"],
@@ -35,6 +35,7 @@ model = AutoModelForCausalLM.from_pretrained(
35
  device_map="auto",
36
  # load_in_8bit=True,
37
  load_in_4bit=True,
 
38
  )
39
 
40
  def chat(message, history, system_prompt):
 
14
 
15
  # UI Settings
16
  title = "Shisa 7B"
17
+ description = "Test out <a href='https://huggingface.co/augmxnt/shisa-7b-v1'>Shisa 7B</a> in either English or Japanese. If you aren't getting the right language outputs, you can try changing the system prompt to the appropriate language.\n\nNote: we are running this model quantized at `load_in_4bit` to fit in 16GB of VRAM."
18
  placeholder = "Type Here / ここにε…₯εŠ›γ—γ¦γγ γ•γ„"
19
  examples = [
20
  ["What are the best slices of pizza in New York City?"],
 
35
  device_map="auto",
36
  # load_in_8bit=True,
37
  load_in_4bit=True,
38
+ use_flash_attention_2=True,
39
  )
40
 
41
  def chat(message, history, system_prompt):