Update app.py
Browse files
app.py
CHANGED
@@ -102,110 +102,111 @@ def ask_ds(message, history):
|
|
102 |
global extractions
|
103 |
|
104 |
if len(message) == 0:
|
105 |
-
|
106 |
|
107 |
if authenticated == False:
|
108 |
|
109 |
if division == None:
|
110 |
if message.lower().strip() in ['ime', 'peer disability', 'pas']:
|
111 |
division = message.lower().strip().replace(' ', '_')
|
112 |
-
|
113 |
else:
|
114 |
-
|
115 |
elif employee_type == None:
|
116 |
if message.lower().strip() in ['csr', 'qa']:
|
117 |
employee_type = message.lower().strip()
|
118 |
authenticated = True
|
119 |
EXTRACTIONS_PATH = EXTRACTIONS_PATH.replace('{employee_type}', employee_type).replace('{division}', division[:3])
|
120 |
extractions = read_json_from_s3()
|
121 |
-
|
122 |
else:
|
123 |
-
|
124 |
-
|
125 |
-
question = message
|
126 |
-
|
127 |
-
# RAG
|
128 |
-
question_embedding = get_titan_embedding(bedrock_client, 'question', question)
|
129 |
-
|
130 |
-
similar_documents = []
|
131 |
-
for file, data in extractions.items():
|
132 |
-
similarity = cosine_similarity(question_embedding, np.array(data['embedding']))
|
133 |
-
similar_documents.append((file, similarity))
|
134 |
-
|
135 |
-
similar_documents.sort(key=lambda x: x[1], reverse=False)
|
136 |
-
top_docs = similar_documents[:5]
|
137 |
-
|
138 |
-
similar_content = ''
|
139 |
-
for file, _ in top_docs:
|
140 |
-
similar_content += extractions[file]['content'] + '\n'
|
141 |
-
|
142 |
-
# Invoke
|
143 |
-
response = bedrock_client.invoke_model_with_response_stream(
|
144 |
-
modelId="anthropic.claude-3-sonnet-20240229-v1:0",
|
145 |
-
body=json.dumps(
|
146 |
-
{
|
147 |
-
"anthropic_version": "bedrock-2023-05-31",
|
148 |
-
"max_tokens": 4096,
|
149 |
-
"system": f"""Here is some relevant information that may help answer the user's upcoming question:
|
150 |
-
|
151 |
-
<relevant_information>
|
152 |
-
{similar_content}
|
153 |
-
</relevant_information>
|
154 |
-
|
155 |
-
The user's question is:
|
156 |
-
<question>{question}</question>
|
157 |
-
|
158 |
-
Please carefully review the relevant information provided above.
|
159 |
-
|
160 |
-
Your task is to review the provided relevant information and answer the user's question to the best of your ability.
|
161 |
-
Aim to use information from the relevant information section to directly address the question asked, and refrain from saying
|
162 |
-
things like 'According to the relevant information provided'.
|
163 |
-
|
164 |
-
Format your output nicely with sentences that are not too long, in a professional and kind tone. You should prefer lists or
|
165 |
-
bullet points when applicable. Begin by thanking the user for their question, and at the end of your answer, say "Thank you for using Ask Dane Street!"
|
166 |
-
Remember, aim to only use information from the relevant information section in your response, without explicitly referring
|
167 |
-
to that section. Return your answer immediately and without preamble.
|
168 |
-
</Task>
|
169 |
-
|
170 |
-
<Relevant Information>
|
171 |
-
{similar_content}
|
172 |
-
</Relevant Information>""",
|
173 |
-
"messages": [
|
174 |
-
{
|
175 |
-
"role": "user",
|
176 |
-
"content": [
|
177 |
-
{
|
178 |
-
"type": "text",
|
179 |
-
"text": message
|
180 |
-
}
|
181 |
-
]
|
182 |
-
}
|
183 |
-
],
|
184 |
-
}
|
185 |
-
),
|
186 |
-
)
|
187 |
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
|
211 |
# Create necessary services and collect data
|
|
|
102 |
global extractions
|
103 |
|
104 |
if len(message) == 0:
|
105 |
+
yield None
|
106 |
|
107 |
if authenticated == False:
|
108 |
|
109 |
if division == None:
|
110 |
if message.lower().strip() in ['ime', 'peer disability', 'pas']:
|
111 |
division = message.lower().strip().replace(' ', '_')
|
112 |
+
yield "[1] CSR\n[2] QA"
|
113 |
else:
|
114 |
+
yield "Please select a valid choice."
|
115 |
elif employee_type == None:
|
116 |
if message.lower().strip() in ['csr', 'qa']:
|
117 |
employee_type = message.lower().strip()
|
118 |
authenticated = True
|
119 |
EXTRACTIONS_PATH = EXTRACTIONS_PATH.replace('{employee_type}', employee_type).replace('{division}', division[:3])
|
120 |
extractions = read_json_from_s3()
|
121 |
+
yield "Welcome to Ask Dane Street! Whether you're new to the team or just looking for some quick information, I'm here to guide you through our company's literature and platform. Simply ask your question, and I'll provide you with the most relevant information I can."
|
122 |
else:
|
123 |
+
yield "Please select a valid choice."
|
124 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
question = message
|
127 |
+
|
128 |
+
# RAG
|
129 |
+
question_embedding = get_titan_embedding(bedrock_client, 'question', question)
|
130 |
+
|
131 |
+
similar_documents = []
|
132 |
+
for file, data in extractions.items():
|
133 |
+
similarity = cosine_similarity(question_embedding, np.array(data['embedding']))
|
134 |
+
similar_documents.append((file, similarity))
|
135 |
+
|
136 |
+
similar_documents.sort(key=lambda x: x[1], reverse=False)
|
137 |
+
top_docs = similar_documents[:5]
|
138 |
+
|
139 |
+
similar_content = ''
|
140 |
+
for file, _ in top_docs:
|
141 |
+
similar_content += extractions[file]['content'] + '\n'
|
142 |
+
|
143 |
+
# Invoke
|
144 |
+
response = bedrock_client.invoke_model_with_response_stream(
|
145 |
+
modelId="anthropic.claude-3-sonnet-20240229-v1:0",
|
146 |
+
body=json.dumps(
|
147 |
+
{
|
148 |
+
"anthropic_version": "bedrock-2023-05-31",
|
149 |
+
"max_tokens": 4096,
|
150 |
+
"system": f"""Here is some relevant information that may help answer the user's upcoming question:
|
151 |
+
|
152 |
+
<relevant_information>
|
153 |
+
{similar_content}
|
154 |
+
</relevant_information>
|
155 |
+
|
156 |
+
The user's question is:
|
157 |
+
<question>{question}</question>
|
158 |
+
|
159 |
+
Please carefully review the relevant information provided above.
|
160 |
+
|
161 |
+
Your task is to review the provided relevant information and answer the user's question to the best of your ability.
|
162 |
+
Aim to use information from the relevant information section to directly address the question asked, and refrain from saying
|
163 |
+
things like 'According to the relevant information provided'.
|
164 |
+
|
165 |
+
Format your output nicely with sentences that are not too long, in a professional and kind tone. You should prefer lists or
|
166 |
+
bullet points when applicable. Begin by thanking the user for their question, and at the end of your answer, say "Thank you for using Ask Dane Street!"
|
167 |
+
Remember, aim to only use information from the relevant information section in your response, without explicitly referring
|
168 |
+
to that section. Return your answer immediately and without preamble.
|
169 |
+
</Task>
|
170 |
+
|
171 |
+
<Relevant Information>
|
172 |
+
{similar_content}
|
173 |
+
</Relevant Information>""",
|
174 |
+
"messages": [
|
175 |
+
{
|
176 |
+
"role": "user",
|
177 |
+
"content": [
|
178 |
+
{
|
179 |
+
"type": "text",
|
180 |
+
"text": message
|
181 |
+
}
|
182 |
+
]
|
183 |
+
}
|
184 |
+
],
|
185 |
+
}
|
186 |
+
),
|
187 |
+
)
|
188 |
+
|
189 |
+
# Stream the response
|
190 |
+
all_text = ''
|
191 |
+
stream = response.get('body')
|
192 |
+
if stream:
|
193 |
+
for event in stream:
|
194 |
+
chunk = event.get('chunk')
|
195 |
+
if chunk and json.loads(chunk.get('bytes').decode()):
|
196 |
+
# check if delta is present
|
197 |
+
try:
|
198 |
+
this_text = json.loads(chunk.get('bytes').decode()).get('delta').get('text')
|
199 |
+
all_text += this_text
|
200 |
+
yield all_text # Stream the text back to the UI
|
201 |
+
except:
|
202 |
+
pass
|
203 |
+
|
204 |
+
# Print relevant files
|
205 |
+
output = '\n\nCheck out the following documents for more information:\n'
|
206 |
+
for file, sim in top_docs:
|
207 |
+
output += f"\n{file.replace('.txt', '.pdf')}"
|
208 |
+
|
209 |
+
yield all_text + output
|
210 |
|
211 |
|
212 |
# Create necessary services and collect data
|