Spaces:
Runtime error
Runtime error
leonardlin
commited on
Commit
β’
3831a9c
1
Parent(s):
484081f
trying fa2 again
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ model_name = "augmxnt/shisa-7b-v1"
|
|
14 |
|
15 |
# UI Settings
|
16 |
title = "Shisa 7B"
|
17 |
-
description = "Test out <a href='https://huggingface.co/augmxnt/shisa-7b-v1'>Shisa 7B</a> in either English or Japanese. If you aren't getting the right language outputs, you can try changing the system prompt to the appropriate language
|
18 |
placeholder = "Type Here / γγγ«ε
₯εγγ¦γγ γγ"
|
19 |
examples = [
|
20 |
["What are the best slices of pizza in New York City?"],
|
@@ -35,6 +35,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
35 |
device_map="auto",
|
36 |
# load_in_8bit=True,
|
37 |
load_in_4bit=True,
|
|
|
38 |
)
|
39 |
|
40 |
def chat(message, history, system_prompt):
|
|
|
14 |
|
15 |
# UI Settings
|
16 |
title = "Shisa 7B"
|
17 |
+
description = "Test out <a href='https://huggingface.co/augmxnt/shisa-7b-v1'>Shisa 7B</a> in either English or Japanese. If you aren't getting the right language outputs, you can try changing the system prompt to the appropriate language.\n\nNote: we are running this model quantized at `load_in_4bit` to fit in 16GB of VRAM."
|
18 |
placeholder = "Type Here / γγγ«ε
₯εγγ¦γγ γγ"
|
19 |
examples = [
|
20 |
["What are the best slices of pizza in New York City?"],
|
|
|
35 |
device_map="auto",
|
36 |
# load_in_8bit=True,
|
37 |
load_in_4bit=True,
|
38 |
+
use_flash_attention_2=True,
|
39 |
)
|
40 |
|
41 |
def chat(message, history, system_prompt):
|