leonardlin commited on
Commit
bd17394
1 Parent(s): 39c14f3

add flash attention, reorder examples

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -18,8 +18,8 @@ description = "Test out Shisa 7B in either English or Japanese. If you aren't ge
18
  placeholder = "Type Here / ここに入力してください"
19
  examples = [
20
  ["What are the best slices of pizza in New York City?"],
21
- ['How do I program a simple "hello world" in Python?'],
22
  ["東京でおすすめのラーメン屋ってどこ?"],
 
23
  ["Pythonでシンプルな「ハローワールド」をプログラムするにはどうすればいいですか?"],
24
  ]
25
 
@@ -40,6 +40,7 @@ model = AutoModelForCausalLM.from_pretrained(
40
  bnb_4bit_use_double_quant=True,
41
  bnb_4bit_compute_dtype=torch.bfloat16
42
  ),
 
43
  )
44
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
45
 
 
18
  placeholder = "Type Here / ここに入力してください"
19
  examples = [
20
  ["What are the best slices of pizza in New York City?"],
 
21
  ["東京でおすすめのラーメン屋ってどこ?"],
22
+ ['How do I program a simple "hello world" in Python?'],
23
  ["Pythonでシンプルな「ハローワールド」をプログラムするにはどうすればいいですか?"],
24
  ]
25
 
 
40
  bnb_4bit_use_double_quant=True,
41
  bnb_4bit_compute_dtype=torch.bfloat16
42
  ),
43
+ use_flash_attention_2=True
44
  )
45
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
46