Update app.py
Browse files
app.py
CHANGED
@@ -23,9 +23,16 @@ model_name = "gpt-3.5-turbo"
|
|
23 |
# Define the template
|
24 |
template = PromptTemplate(
|
25 |
prompt="""
|
26 |
-
Answer the given question using the
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
)
|
30 |
|
31 |
# Create a list of options for the dropdown
|
@@ -44,21 +51,30 @@ def get_docs(input_query, country = None):
|
|
44 |
query = "For the country of "+country+", "+input_query
|
45 |
else:
|
46 |
query = input_query
|
47 |
-
# Get top 150 because we want to make sure we have 10 pertaining to the selected country
|
48 |
# TEMP SOLUTION: not ideal, but FAISS document store doesnt allow metadata filtering. Needs to be tested with the full dataset
|
49 |
docs = retriever.retrieve(query=query,top_k = 150)
|
50 |
-
# Break out the key fields and convert to pandas for filtering
|
51 |
docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
|
52 |
df_docs = pd.DataFrame(docs)
|
53 |
if country:
|
54 |
df_docs = df_docs.query('country in @country')
|
55 |
# Take the top 10
|
56 |
df_docs = df_docs.head(10)
|
|
|
|
|
57 |
# Convert back to Document format
|
58 |
ls_dict = []
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
return(ls_dict)
|
63 |
|
64 |
def run_query(input_text):
|
|
|
23 |
# Define the template
|
24 |
template = PromptTemplate(
|
25 |
prompt="""
|
26 |
+
Answer the given question using the following documents \
|
27 |
+
Formulate your answer in the style of an academic report \
|
28 |
+
Provide example quotes and citations using extracted text from the documents. \
|
29 |
+
Use facts and numbers from the documents in your answer. \
|
30 |
+
Reference information used from documents at the end of each applicable sentence (ex: [source: document_name]), where 'document_name' is the text provided at the start of each document (demarcated by '- &&&' and '&&&:')'. \
|
31 |
+
If no relevant information to answer the question is present in the documents, just say you don't have enough information to answer. \
|
32 |
+
Format your response as a JSON object with "answer" and "sources" as the keys. \
|
33 |
+
The "answer" key is the response to the query and "sources" key is the reference information used from the documents. \
|
34 |
+
|
35 |
+
Context: {' - '.join(['&&& '+d.meta['document_name']+' ref. '+str(d.meta['ref_id'])+' &&&: '+d.content for d in documents])}; Question: {query}; Answer:""",
|
36 |
)
|
37 |
|
38 |
# Create a list of options for the dropdown
|
|
|
51 |
query = "For the country of "+country+", "+input_query
|
52 |
else:
|
53 |
query = input_query
|
54 |
+
# Get top 150 because we want to make sure we have 10 pertaining to the selected country
|
55 |
# TEMP SOLUTION: not ideal, but FAISS document store doesnt allow metadata filtering. Needs to be tested with the full dataset
|
56 |
docs = retriever.retrieve(query=query,top_k = 150)
|
57 |
+
# Break out the key fields and convert to pandas for filtering
|
58 |
docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
|
59 |
df_docs = pd.DataFrame(docs)
|
60 |
if country:
|
61 |
df_docs = df_docs.query('country in @country')
|
62 |
# Take the top 10
|
63 |
df_docs = df_docs.head(10)
|
64 |
+
df_docs = df_docs.reset_index()
|
65 |
+
df_docs['ref_id'] = df_docs.index + 1
|
66 |
# Convert back to Document format
|
67 |
ls_dict = []
|
68 |
+
# Iterate over df and add relevant fields to the dict object
|
69 |
+
for index, row in df_docs.iterrows():
|
70 |
+
# Create a Document object for each row
|
71 |
+
doc = Document(
|
72 |
+
row['content'],
|
73 |
+
meta={'country': row['country'],'document_name': row['document'], 'ref_id': row['ref_id'], 'score': row['score']}
|
74 |
+
)
|
75 |
+
|
76 |
+
# Append the Document object to the documents list
|
77 |
+
ls_dict.append(doc)
|
78 |
return(ls_dict)
|
79 |
|
80 |
def run_query(input_text):
|