Spaces:
Runtime error
Runtime error
mrfakename
commited on
Commit
•
7d40869
1
Parent(s):
f7a5b40
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
import spaces
|
2 |
-
|
3 |
#######################
|
4 |
'''
|
5 |
-
Name: Phine Inference
|
6 |
License: MIT
|
7 |
'''
|
8 |
#######################
|
@@ -21,6 +18,7 @@ import transformers
|
|
21 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
22 |
import torch
|
23 |
import random
|
|
|
24 |
import re
|
25 |
|
26 |
def cut_text_after_last_token(text, token):
|
@@ -60,18 +58,17 @@ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
|
|
60 |
|
61 |
|
62 |
|
63 |
-
model_path = 'freecs/
|
64 |
|
65 |
-
device = "cuda"
|
66 |
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
68 |
|
69 |
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
|
70 |
|
71 |
-
sys_message = "You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart." #System Message
|
72 |
-
@spaces.GPU(enable_queue=True)
|
73 |
-
def phine(message, history, temperature, top_p, top_k, repetition_penalty):
|
74 |
|
|
|
|
|
75 |
|
76 |
|
77 |
n = 0
|
@@ -83,7 +80,9 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
|
|
83 |
if n%2 == 0:
|
84 |
context+=f"""\n<|prompt|>{h}\n"""
|
85 |
else:
|
86 |
-
|
|
|
|
|
87 |
n+=1
|
88 |
else:
|
89 |
|
@@ -92,7 +91,7 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
|
|
92 |
|
93 |
|
94 |
|
95 |
-
prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"<|endoftext|>\n<|
|
96 |
tokenized = tokenizer(prompt, return_tensors="pt").to(device)
|
97 |
|
98 |
|
@@ -106,28 +105,44 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
|
|
106 |
starting_idx=tokenized.input_ids.shape[-1])
|
107 |
])
|
108 |
|
109 |
-
|
110 |
token = model.generate(**tokenized,
|
111 |
stopping_criteria=stopping_criteria_list,
|
112 |
do_sample=True,
|
113 |
max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
|
114 |
)
|
115 |
|
116 |
-
completion = tokenizer.decode(token[0], skip_special_tokens=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
token = "<|response|>"
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
120 |
|
121 |
|
122 |
demo = gr.ChatInterface(phine,
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
if __name__ == "__main__":
|
133 |
-
demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False
|
|
|
|
|
|
|
1 |
#######################
|
2 |
'''
|
|
|
3 |
License: MIT
|
4 |
'''
|
5 |
#######################
|
|
|
18 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
19 |
import torch
|
20 |
import random
|
21 |
+
import spaces
|
22 |
import re
|
23 |
|
24 |
def cut_text_after_last_token(text, token):
|
|
|
58 |
|
59 |
|
60 |
|
61 |
+
model_path = 'freecs/ArtificialThinker-Phi2'
|
62 |
|
63 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
64 |
|
65 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
66 |
|
67 |
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
|
68 |
|
|
|
|
|
|
|
69 |
|
70 |
+
@spaces.GPU(enable_queue=True)
|
71 |
+
def phine(message, history, temperature, top_p, top_k, repetition_penalty, sys_message):
|
72 |
|
73 |
|
74 |
n = 0
|
|
|
80 |
if n%2 == 0:
|
81 |
context+=f"""\n<|prompt|>{h}\n"""
|
82 |
else:
|
83 |
+
pattern = re.compile(r'<details>.*?</details>')
|
84 |
+
result = re.sub(pattern, '', h)
|
85 |
+
context+=f"""<|response|>{result}"""
|
86 |
n+=1
|
87 |
else:
|
88 |
|
|
|
91 |
|
92 |
|
93 |
|
94 |
+
prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"<|endoftext|>\n<|reasoning|>"
|
95 |
tokenized = tokenizer(prompt, return_tensors="pt").to(device)
|
96 |
|
97 |
|
|
|
105 |
starting_idx=tokenized.input_ids.shape[-1])
|
106 |
])
|
107 |
|
108 |
+
|
109 |
token = model.generate(**tokenized,
|
110 |
stopping_criteria=stopping_criteria_list,
|
111 |
do_sample=True,
|
112 |
max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
|
113 |
)
|
114 |
|
115 |
+
completion = tokenizer.decode(token[0], skip_special_tokens=True)
|
116 |
+
|
117 |
+
token = "<|reasoning|>"
|
118 |
+
reasoning = cut_text_after_last_token(completion, token)
|
119 |
+
prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"+reasoning+"\n<|response|>"
|
120 |
+
|
121 |
+
tokenized = tokenizer(prompt, return_tensors="pt").to(device)
|
122 |
+
token = model.generate(**tokenized,
|
123 |
+
stopping_criteria=stopping_criteria_list,
|
124 |
+
do_sample=True,
|
125 |
+
max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
|
126 |
+
)
|
127 |
+
completion = tokenizer.decode(token[0], skip_special_tokens=True)
|
128 |
token = "<|response|>"
|
129 |
+
response = cut_text_after_last_token(completion, token)
|
130 |
+
|
131 |
+
|
132 |
+
res = f"""<details><summary>Reasoning</summary>{reasoning}</details>\n\n{response}"""
|
133 |
+
|
134 |
+
return res
|
135 |
|
136 |
|
137 |
demo = gr.ChatInterface(phine,
|
138 |
+
additional_inputs=[
|
139 |
+
gr.Slider(0.1, 2.0, label="temperature", value=0.5),
|
140 |
+
gr.Slider(0.1, 2.0, label="Top P", value=0.9),
|
141 |
+
gr.Slider(1, 500, label="Top K", value=50),
|
142 |
+
gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.1),
|
143 |
+
gr.Textbox(label="System Prompt",max_lines=1,interactive=True, value="You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart.")
|
144 |
+
]
|
145 |
+
)
|
146 |
+
|
147 |
if __name__ == "__main__":
|
148 |
+
demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False
|