Bullet_Point / app.py
jaimin's picture
Update app.py
cf2fd10
raw
history blame
2.17 kB
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
import nltk
from nltk.tokenize import word_tokenize
import re
nltk.download('punkt')
model_name = "deepset/roberta-base-squad2"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def bullete(text, wikipedia_language="en"):
try:
question_words = STOPWORDS.union(
set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?']))
lower_text = text.lower()
lower_text = word_tokenize(lower_text)
new_text = [i for i in lower_text if i not in question_words]
new_txt = "".join(new_text)
if wikipedia_language:
wikipedia.set_lang(wikipedia_language)
et_page = wikipedia.page(new_txt.replace(" ", ""))
title = et_page.title
content = et_page.content
page_url = et_page.url
linked_pages = et_page.links
text1 = content
except:
return "Please write correct question"
final_out = re.sub(r'\=.+\=', '', text1)
result = list(filter(lambda x: x != '', final_out.split('\n\n')))
answer = []
try:
for i in range(len(result[0].split('.'))):
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
'question': text,
'context': result[0].split('.')[i]
}
res = nlp(QA_input)
print(QA_input)
values = list(res.values())[3]
answer.append(values)
except:
gen_output = []
for i in range(len(answer)):
gen_output.append("* " + answer[i] + ".")
paraphrase = "\n".join(gen_output)
final_answer = paraphrase.replace(" ", " ")
return final_answer
interface = gr.Interface(fn=bullete,
inputs="text",
outputs="text",
title='Bullet Point')
interface.launch(inline=False)