mrfakename commited on
Commit
7d40869
1 Parent(s): f7a5b40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -24
app.py CHANGED
@@ -1,8 +1,5 @@
1
- import spaces
2
-
3
  #######################
4
  '''
5
- Name: Phine Inference
6
  License: MIT
7
  '''
8
  #######################
@@ -21,6 +18,7 @@ import transformers
21
  from transformers import AutoTokenizer, AutoModelForCausalLM
22
  import torch
23
  import random
 
24
  import re
25
 
26
  def cut_text_after_last_token(text, token):
@@ -60,18 +58,17 @@ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
60
 
61
 
62
 
63
- model_path = 'freecs/phine-2-v0'
64
 
65
- device = "cuda"
66
 
67
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
68
 
69
  model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
70
 
71
- sys_message = "You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart." #System Message
72
- @spaces.GPU(enable_queue=True)
73
- def phine(message, history, temperature, top_p, top_k, repetition_penalty):
74
 
 
 
75
 
76
 
77
  n = 0
@@ -83,7 +80,9 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
83
  if n%2 == 0:
84
  context+=f"""\n<|prompt|>{h}\n"""
85
  else:
86
- context+=f"""<|response|>{h}"""
 
 
87
  n+=1
88
  else:
89
 
@@ -92,7 +91,7 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
92
 
93
 
94
 
95
- prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"<|endoftext|>\n<|response|>"
96
  tokenized = tokenizer(prompt, return_tensors="pt").to(device)
97
 
98
 
@@ -106,28 +105,44 @@ def phine(message, history, temperature, top_p, top_k, repetition_penalty):
106
  starting_idx=tokenized.input_ids.shape[-1])
107
  ])
108
 
109
-
110
  token = model.generate(**tokenized,
111
  stopping_criteria=stopping_criteria_list,
112
  do_sample=True,
113
  max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
114
  )
115
 
116
- completion = tokenizer.decode(token[0], skip_special_tokens=False)
 
 
 
 
 
 
 
 
 
 
 
 
117
  token = "<|response|>"
118
- res = cut_text_after_last_token(completion, token)
119
- return res.replace('<|endoftext|>', '')
 
 
 
 
120
 
121
 
122
  demo = gr.ChatInterface(phine,
123
- title="Phine Demo",
124
- description="Demo of [Phine 2](https://huggingface.co/freecs/phine-2-v0). We are not affiliated with and do not endorse Phine 2.",
125
- additional_inputs=[
126
- gr.Slider(0.1, 2.0, label="temperature", value=0.5),
127
- gr.Slider(0.1, 2.0, label="Top P", value=0.9),
128
- gr.Slider(1, 500, label="Top K", value=50),
129
- gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.15)
130
- ]
131
- )
132
  if __name__ == "__main__":
133
- demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False
 
 
 
1
  #######################
2
  '''
 
3
  License: MIT
4
  '''
5
  #######################
 
18
  from transformers import AutoTokenizer, AutoModelForCausalLM
19
  import torch
20
  import random
21
+ import spaces
22
  import re
23
 
24
  def cut_text_after_last_token(text, token):
 
58
 
59
 
60
 
61
+ model_path = 'freecs/ArtificialThinker-Phi2'
62
 
63
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
 
65
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
66
 
67
  model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
68
 
 
 
 
69
 
70
+ @spaces.GPU(enable_queue=True)
71
+ def phine(message, history, temperature, top_p, top_k, repetition_penalty, sys_message):
72
 
73
 
74
  n = 0
 
80
  if n%2 == 0:
81
  context+=f"""\n<|prompt|>{h}\n"""
82
  else:
83
+ pattern = re.compile(r'<details>.*?</details>')
84
+ result = re.sub(pattern, '', h)
85
+ context+=f"""<|response|>{result}"""
86
  n+=1
87
  else:
88
 
 
91
 
92
 
93
 
94
+ prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"<|endoftext|>\n<|reasoning|>"
95
  tokenized = tokenizer(prompt, return_tensors="pt").to(device)
96
 
97
 
 
105
  starting_idx=tokenized.input_ids.shape[-1])
106
  ])
107
 
108
+
109
  token = model.generate(**tokenized,
110
  stopping_criteria=stopping_criteria_list,
111
  do_sample=True,
112
  max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
113
  )
114
 
115
+ completion = tokenizer.decode(token[0], skip_special_tokens=True)
116
+
117
+ token = "<|reasoning|>"
118
+ reasoning = cut_text_after_last_token(completion, token)
119
+ prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"+reasoning+"\n<|response|>"
120
+
121
+ tokenized = tokenizer(prompt, return_tensors="pt").to(device)
122
+ token = model.generate(**tokenized,
123
+ stopping_criteria=stopping_criteria_list,
124
+ do_sample=True,
125
+ max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
126
+ )
127
+ completion = tokenizer.decode(token[0], skip_special_tokens=True)
128
  token = "<|response|>"
129
+ response = cut_text_after_last_token(completion, token)
130
+
131
+
132
+ res = f"""<details><summary>Reasoning</summary>{reasoning}</details>\n\n{response}"""
133
+
134
+ return res
135
 
136
 
137
  demo = gr.ChatInterface(phine,
138
+ additional_inputs=[
139
+ gr.Slider(0.1, 2.0, label="temperature", value=0.5),
140
+ gr.Slider(0.1, 2.0, label="Top P", value=0.9),
141
+ gr.Slider(1, 500, label="Top K", value=50),
142
+ gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.1),
143
+ gr.Textbox(label="System Prompt",max_lines=1,interactive=True, value="You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart.")
144
+ ]
145
+ )
146
+
147
  if __name__ == "__main__":
148
+ demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False