Bullet_Point / app.py
jaimin's picture
Update app.py
74ea174
raw
history blame
No virus
2.2 kB
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
import nltk
from nltk.tokenize import word_tokenize
import re
nltk.download('punkt')
#model_name = "deepset/roberta-base-squad2"
model_name="jaimin/Bullet_Point"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def bullete(text, wikipedia_language="en"):
try:
question_words = STOPWORDS.union(
set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?']))
lower_text = text.lower()
lower_text = word_tokenize(lower_text)
new_text = [i for i in lower_text if i not in question_words]
new_txt = "".join(new_text)
if wikipedia_language:
wikipedia.set_lang(wikipedia_language)
et_page = wikipedia.page(new_txt.replace(" ", ""))
title = et_page.title
content = et_page.content
page_url = et_page.url
linked_pages = et_page.links
text1 = content
except:
return "Please write correct question"
final_out = re.sub(r'\=.+\=', '', text1)
result = list(filter(lambda x: x != '', final_out.split('\n\n')))
answer = []
try:
for i in range(len(result[0].split('.'))):
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
'question': text,
'context': result[0].split('.')[i]
}
res = nlp(QA_input)
print(QA_input)
values = list(res.values())[3]
answer.append(values)
except:
gen_output = []
for i in range(len(answer)):
gen_output.append("* " + answer[i] + ".")
paraphrase = "\n".join(gen_output)
final_answer = paraphrase.replace(" ", " ")
return final_answer
interface = gr.Interface(fn=bullete,
inputs="text",
outputs="text",
title='Bullet Point')
interface.launch(inline=False)