Spaces:

letrunglinh
/

chatbot_full

Runtime error

App Files Files Community

chatbot_full / app.py

letrunglinh

Update app.py

fef4425 over 1 year ago

raw

history blame

No virus

2.01 kB

	import json
	from text_utils import *
	import pandas as pd
	from qa_model import *
	from bm25_utils import *
	from pairwise_model import *

	import nltk
	nltk.download('punkt')


	df_wiki_windows = pd.read_csv("./processed/wikipedia_chungta_cleaned.csv")
	df_wiki = pd.read_csv("./processed/wikipedia_chungta_short.csv")
	df_wiki.title = df_wiki.title.apply(str)


	entity_dict = json.load(open("./processed/entities.json"))
	new_dict = dict()
	for key, val in entity_dict.items():
	val = val.replace("wiki/", "").replace("_", " ")
	entity_dict[key] = val
	key = preprocess(key)
	new_dict[key.lower()] = val
	entity_dict.update(new_dict)
	title2idx = dict([(x.strip(), y) for x, y in zip(df_wiki.title, df_wiki.index.values)])


	qa_model = QAEnsembleModel_modify("letrunglinh/qa_pnc", entity_dict)
	pairwise_model_stage1 = PairwiseModel_modify("nguyenvulebinh/vi-mrc-base")

	bm25_model_stage1 = BM25Gensim("./outputs/bm25_stage1/", entity_dict, title2idx)


	def get_answer_e2e(question):
	#Bm25 retrieval for top200 candidates
	query = preprocess(question).lower()
	top_n, bm25_scores = bm25_model_stage1.get_topk_stage1(query, topk=200)
	titles = [preprocess(df_wiki_windows.title.values[i]) for i in top_n]
	pre_texts = [preprocess(df_wiki_windows.text.values[i]) for i in top_n]

	#Reranking with pairwise model for top10
	question = preprocess(question)
	ranking_preds = pairwise_model_stage1.stage1_ranking(question, pre_texts)

	ranking_scores = ranking_preds * bm25_scores

	#Question answering
	best_idxs = np.argsort(ranking_scores)[-10:]
	ranking_scores = np.array(ranking_scores)[best_idxs]
	texts = np.array(pre_texts)[best_idxs]

	best_answer = qa_model(question, texts, ranking_scores)

	if best_answer is None:
	return pre_texts[0]

	return best_answer

	if __name__ == "__main__":
	# result = get_answer_e2e("OKR là gì?")
	# print(result)
	gr.Interface(fn=get_answer_e2e, inputs=["text"], outputs=["textbox"]).launch()