Spaces:
Sleeping
Sleeping
update keyword search
Browse files- Home.py +1 -1
- src/chatbot.py +9 -6
Home.py
CHANGED
@@ -24,7 +24,7 @@ with gr.Blocks() as App:
|
|
24 |
#Row orientation
|
25 |
with gr.Row() as additional_input:
|
26 |
n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
|
27 |
-
party_dopdown = gr.Dropdown(choices=['CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party')
|
28 |
|
29 |
search_btn = gr.Button('Search')
|
30 |
|
|
|
24 |
#Row orientation
|
25 |
with gr.Row() as additional_input:
|
26 |
n_slider = gr.Slider(label="Number of Results", minimum=1, maximum=100, step=1, value=10)
|
27 |
+
party_dopdown = gr.Dropdown(value='All', choices=['All','CDU/CSU','SPD','FDP','Grüne','not found','DIE LINKE.','PDS','KPD'], label='Party') #change to all possible options
|
28 |
|
29 |
search_btn = gr.Button('Search')
|
30 |
|
src/chatbot.py
CHANGED
@@ -61,15 +61,18 @@ def chatbot(message, history, db=db, llm=llm, prompt=prompt2):
|
|
61 |
return response
|
62 |
|
63 |
# Retrieve speech contents based on keywords
|
64 |
-
def keyword_search(query,n=10, db=db, embeddings=embeddings, method='ss', party_filter = ''):
|
65 |
query_embedding = embeddings.embed_query(query)
|
66 |
if method == 'mmr':
|
67 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
|
68 |
-
results = db.max_marginal_relevance_search_with_score_by_vector(query_embedding, k = n
|
69 |
for doc in results:
|
|
|
|
|
|
|
|
|
70 |
speech_content = doc[0].page_content
|
71 |
speech_date = doc[0].metadata["date"]
|
72 |
-
party = doc[0].metadata["party"]
|
73 |
score = round(doc[1], ndigits=2) # Relevance based on relevance search
|
74 |
df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
|
75 |
'Date': [speech_date],
|
@@ -78,12 +81,12 @@ def keyword_search(query,n=10, db=db, embeddings=embeddings, method='ss', party_
|
|
78 |
df_res.sort_values('Relevance', inplace=True, ascending=True)
|
79 |
else:
|
80 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party']) # Add Date/Party/Politician #Add filter
|
81 |
-
results = db.similarity_search_by_vector(query_embedding, k = n
|
82 |
for doc in results:
|
83 |
party = doc.metadata["party"]
|
84 |
#Filter by party input
|
85 |
-
|
86 |
-
|
87 |
speech_content = doc.page_content
|
88 |
speech_date = doc.metadata["date"]
|
89 |
|
|
|
61 |
return response
|
62 |
|
63 |
# Retrieve speech contents based on keywords
|
64 |
+
def keyword_search(query,n=10, db=db, embeddings=embeddings, method='ss', party_filter = 'All'):
|
65 |
query_embedding = embeddings.embed_query(query)
|
66 |
if method == 'mmr':
|
67 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party', 'Relevance']) # Add Date/Party/Politician
|
68 |
+
results = db.max_marginal_relevance_search_with_score_by_vector(query_embedding, k = n)
|
69 |
for doc in results:
|
70 |
+
party = doc[0].metadata["party"]
|
71 |
+
#Filter by party input
|
72 |
+
if party != party_filter and party_filter != 'All':
|
73 |
+
continue
|
74 |
speech_content = doc[0].page_content
|
75 |
speech_date = doc[0].metadata["date"]
|
|
|
76 |
score = round(doc[1], ndigits=2) # Relevance based on relevance search
|
77 |
df_res = pd.concat([df_res, pd.DataFrame({'Speech Content': [speech_content],
|
78 |
'Date': [speech_date],
|
|
|
81 |
df_res.sort_values('Relevance', inplace=True, ascending=True)
|
82 |
else:
|
83 |
df_res = pd.DataFrame(columns=['Speech Content','Date', 'Party']) # Add Date/Party/Politician #Add filter
|
84 |
+
results = db.similarity_search_by_vector(query_embedding, k = n)
|
85 |
for doc in results:
|
86 |
party = doc.metadata["party"]
|
87 |
#Filter by party input
|
88 |
+
if party != party_filter and party_filter != 'All':
|
89 |
+
continue
|
90 |
speech_content = doc.page_content
|
91 |
speech_date = doc.metadata["date"]
|
92 |
|