Testing mistral recieving the response from vectara and returning an answer
Browse files
app.py
CHANGED
@@ -88,29 +88,30 @@ def launch_bot():
|
|
88 |
with st.spinner("Thinking..."):
|
89 |
prompt2 = prompt + master_prompt
|
90 |
response = generate_response(prompt2)
|
91 |
-
|
92 |
#st.write("reroute to LLM")
|
93 |
#call in Mistral
|
94 |
-
|
95 |
-
|
|
|
96 |
|
97 |
|
98 |
-
|
99 |
|
100 |
-
|
101 |
|
102 |
#from mistral docs: prompt = "My favourite condiment is"
|
103 |
|
104 |
|
105 |
-
|
106 |
|
107 |
-
|
108 |
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
message = {"role": "assistant", "content": response}
|
115 |
st.session_state.messages.append(message)
|
116 |
|
|
|
88 |
with st.spinner("Thinking..."):
|
89 |
prompt2 = prompt + master_prompt
|
90 |
response = generate_response(prompt2)
|
91 |
+
# if response == 'The returned results did not contain sufficient information to be summarized into a useful answer for your query. Please try a different search or restate your query differently.':
|
92 |
#st.write("reroute to LLM")
|
93 |
#call in Mistral
|
94 |
+
prompt3 = prompt2 + "context:" + response
|
95 |
+
print("Called in Mistral")
|
96 |
+
device = "cuda" # the device to load the model onto
|
97 |
|
98 |
|
99 |
+
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
|
100 |
|
101 |
+
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
|
102 |
|
103 |
#from mistral docs: prompt = "My favourite condiment is"
|
104 |
|
105 |
|
106 |
+
model_inputs = tokenizer([prompt3], return_tensors="pt").to(device)
|
107 |
|
108 |
+
model.to(device)
|
109 |
|
110 |
|
111 |
+
generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
|
112 |
+
st.write(tokenizer.batch_decode(generated_ids)[0])
|
113 |
+
# else:
|
114 |
+
# st.write(response)
|
115 |
message = {"role": "assistant", "content": response}
|
116 |
st.session_state.messages.append(message)
|
117 |
|