aixsatoshi
commited on
Commit
•
405aa63
1
Parent(s):
58e53d1
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
import subprocess
|
2 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
3 |
-
import spaces
|
4 |
import torch
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
import gradio as gr
|
7 |
from threading import Thread
|
8 |
|
9 |
-
model_id = "
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
model_id,
|
13 |
-
torch_dtype=torch.float16,
|
14 |
-
device_map="
|
15 |
-
use_flash_attention_2=True,
|
16 |
)
|
17 |
|
18 |
TITLE = "<h1><center>sudy-super/Yamase-12B Chat webui</center></h1>"
|
@@ -42,7 +42,7 @@ h3 {
|
|
42 |
}
|
43 |
"""
|
44 |
|
45 |
-
|
46 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
47 |
print(f'Message: {message}')
|
48 |
print(f'History: {history}')
|
@@ -65,7 +65,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
65 |
max_new_tokens=max_new_tokens,
|
66 |
do_sample=True,
|
67 |
temperature=temperature,
|
68 |
-
eos_token_id=[2],
|
69 |
)
|
70 |
|
71 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
|
|
1 |
import subprocess
|
2 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
3 |
+
#import spaces
|
4 |
import torch
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
import gradio as gr
|
7 |
from threading import Thread
|
8 |
|
9 |
+
model_id = "llm-jp/llm-jp-3-1.8b-instruct"
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
model = AutoModelForCausalLM.from_pretrained(
|
12 |
model_id,
|
13 |
+
#torch_dtype=torch.float16,
|
14 |
+
device_map="cpu",
|
15 |
+
#use_flash_attention_2=True,
|
16 |
)
|
17 |
|
18 |
TITLE = "<h1><center>sudy-super/Yamase-12B Chat webui</center></h1>"
|
|
|
42 |
}
|
43 |
"""
|
44 |
|
45 |
+
#@spaces.GPU(duration=120)
|
46 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
47 |
print(f'Message: {message}')
|
48 |
print(f'History: {history}')
|
|
|
65 |
max_new_tokens=max_new_tokens,
|
66 |
do_sample=True,
|
67 |
temperature=temperature,
|
68 |
+
#eos_token_id=[2],
|
69 |
)
|
70 |
|
71 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|