Spaces:
Sleeping
Sleeping
abrakjamson
commited on
Commit
·
453c7fc
1
Parent(s):
9acb8e6
Corrected history and special tokens
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import torch
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
from repeng import ControlVector, ControlModel
|
@@ -48,16 +49,18 @@ def toggle_slider(checked):
|
|
48 |
return gr.update(visible=checked)
|
49 |
|
50 |
# Function to generate the model's response
|
51 |
-
def generate_response(system_prompt, user_message,
|
52 |
checkboxes = []
|
53 |
sliders = []
|
54 |
|
|
|
|
|
55 |
# Separate checkboxes and sliders based on type
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
|
62 |
if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
|
63 |
return history if history else [], history if history else []
|
@@ -76,21 +79,28 @@ def generate_response(system_prompt, user_message, *args, history=None, max_new_
|
|
76 |
except Exception as e:
|
77 |
print(f"Failed to set control vector {cv_file}: {e}")
|
78 |
|
79 |
-
# Initialize history if None
|
80 |
-
history = history or []
|
81 |
-
|
82 |
-
# Construct the formatted prompt based on history
|
83 |
formatted_prompt = ""
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
87 |
|
88 |
# Append the system prompt if provided
|
89 |
if system_prompt.strip():
|
90 |
-
formatted_prompt += f"[INST] {system_prompt}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
# Append the new user message
|
93 |
-
formatted_prompt += f"
|
94 |
|
95 |
# Tokenize the input
|
96 |
input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
|
@@ -99,23 +109,30 @@ def generate_response(system_prompt, user_message, *args, history=None, max_new_
|
|
99 |
"pad_token_id": tokenizer.eos_token_id,
|
100 |
"do_sample": default_generation_settings["do_sample"],
|
101 |
"max_new_tokens": int(max_new_tokens),
|
102 |
-
"repetition_penalty": repetition_penalty,
|
103 |
}
|
104 |
|
105 |
# Generate the response
|
106 |
-
output_ids = model.generate(**input_ids, **
|
107 |
-
response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# Update conversation history
|
114 |
-
history.append((user_message,
|
115 |
-
return history
|
116 |
|
117 |
# Function to reset the conversation history
|
118 |
def reset_chat():
|
|
|
119 |
return [], []
|
120 |
|
121 |
# Build the Gradio interface
|
@@ -198,20 +215,19 @@ with gr.Blocks() as demo:
|
|
198 |
submit_button = gr.Button("💬 Submit")
|
199 |
new_chat_button = gr.Button("🆕 New Chat")
|
200 |
|
201 |
-
|
202 |
-
state = gr.State()
|
203 |
|
204 |
# Define button actions
|
205 |
submit_button.click(
|
206 |
generate_response,
|
207 |
-
inputs=
|
208 |
-
outputs=[chatbot
|
209 |
)
|
210 |
|
211 |
new_chat_button.click(
|
212 |
reset_chat,
|
213 |
inputs=[],
|
214 |
-
outputs=[chatbot,
|
215 |
)
|
216 |
|
217 |
# Launch the Gradio app
|
|
|
1 |
import os
|
2 |
+
import re
|
3 |
import torch
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
from repeng import ControlVector, ControlModel
|
|
|
49 |
return gr.update(visible=checked)
|
50 |
|
51 |
# Function to generate the model's response
|
52 |
+
def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, *args):
|
53 |
checkboxes = []
|
54 |
sliders = []
|
55 |
|
56 |
+
#inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty] + control_checks + control_sliders
|
57 |
+
|
58 |
# Separate checkboxes and sliders based on type
|
59 |
+
# The first x in args are the checkbox names (the file names)
|
60 |
+
# The second x in args are the slider values
|
61 |
+
for i in range(len(control_vector_files)):
|
62 |
+
checkboxes.append(args[i])
|
63 |
+
sliders.append(args[len(control_vector_files) + i])
|
64 |
|
65 |
if len(checkboxes) != len(control_vector_files) or len(sliders) != len(control_vector_files):
|
66 |
return history if history else [], history if history else []
|
|
|
79 |
except Exception as e:
|
80 |
print(f"Failed to set control vector {cv_file}: {e}")
|
81 |
|
|
|
|
|
|
|
|
|
82 |
formatted_prompt = ""
|
83 |
+
|
84 |
+
|
85 |
+
# Mistral expects the history to be wrapped in <s>history</s>
|
86 |
+
if len(history) > 0:
|
87 |
+
formatted_prompt += "<s>"
|
88 |
|
89 |
# Append the system prompt if provided
|
90 |
if system_prompt.strip():
|
91 |
+
formatted_prompt += f"[INST] {system_prompt} [/INST] "
|
92 |
+
|
93 |
+
# Construct the formatted prompt based on history
|
94 |
+
if len(history) > 0:
|
95 |
+
for turn in history:
|
96 |
+
user_msg, asst_msg = turn
|
97 |
+
formatted_prompt += f"{user_tag} {user_msg} {asst_tag} {asst_msg}"
|
98 |
+
|
99 |
+
if len(history) > 0:
|
100 |
+
formatted_prompt += "</s>"
|
101 |
|
102 |
# Append the new user message
|
103 |
+
formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
|
104 |
|
105 |
# Tokenize the input
|
106 |
input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
|
|
|
109 |
"pad_token_id": tokenizer.eos_token_id,
|
110 |
"do_sample": default_generation_settings["do_sample"],
|
111 |
"max_new_tokens": int(max_new_tokens),
|
112 |
+
"repetition_penalty": repetition_penalty.value,
|
113 |
}
|
114 |
|
115 |
# Generate the response
|
116 |
+
output_ids = model.generate(**input_ids, **generation_settings)
|
117 |
+
response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=False)
|
118 |
+
|
119 |
+
def get_assistant_response(input_string):
|
120 |
+
# Use regex to find the text between the final [/INST] tag and </s>
|
121 |
+
pattern = r'\[/INST\](?!.*\[/INST\])\s*(.*?)(?:</s>|$)'
|
122 |
+
match = re.search(pattern, input_string, re.DOTALL)
|
123 |
+
if match:
|
124 |
+
return match.group(1).strip()
|
125 |
+
return None
|
126 |
+
|
127 |
+
assistant_response = get_assistant_response(response)
|
128 |
|
129 |
# Update conversation history
|
130 |
+
history.append((user_message, assistant_response))
|
131 |
+
return history
|
132 |
|
133 |
# Function to reset the conversation history
|
134 |
def reset_chat():
|
135 |
+
# returns a blank user input text and a blank conversation history
|
136 |
return [], []
|
137 |
|
138 |
# Build the Gradio interface
|
|
|
215 |
submit_button = gr.Button("💬 Submit")
|
216 |
new_chat_button = gr.Button("🆕 New Chat")
|
217 |
|
218 |
+
inputs_list = [system_prompt, user_input, chatbot, max_new_tokens, repetition_penalty] + control_checks + control_sliders
|
|
|
219 |
|
220 |
# Define button actions
|
221 |
submit_button.click(
|
222 |
generate_response,
|
223 |
+
inputs=inputs_list,
|
224 |
+
outputs=[chatbot]
|
225 |
)
|
226 |
|
227 |
new_chat_button.click(
|
228 |
reset_chat,
|
229 |
inputs=[],
|
230 |
+
outputs=[chatbot, user_input]
|
231 |
)
|
232 |
|
233 |
# Launch the Gradio app
|