Spaces:
Runtime error
Runtime error
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline | |
from gensim.parsing.preprocessing import STOPWORDS | |
import wikipedia | |
import gradio as gr | |
import nltk | |
from nltk.tokenize import word_tokenize | |
import re | |
nltk.download('punkt') | |
model_name = "deepset/roberta-base-squad2" | |
model = AutoModelForQuestionAnswering.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def bullete(text,wikipedia_language="en"): | |
question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?'])) | |
lower_text = text.lower() | |
lower_text = word_tokenize(lower_text) | |
new_text = [i for i in lower_text if i not in question_words] | |
new_txt = "".join(new_text) | |
if wikipedia_language: | |
wikipedia.set_lang(wikipedia_language) | |
et_page = wikipedia.page(new_txt.replace(" ", "")) | |
title = et_page.title | |
content = et_page.content | |
page_url = et_page.url | |
linked_pages = et_page.links | |
text1 = content | |
final_out = re.sub(r'\=.+\=', '', text1) | |
result = list(filter(lambda x: x != '', final_out.split('\n\n'))) | |
answer = [] | |
for i in range(4): | |
if len(result[i]) > 500: | |
summary_point=result[i].split(".")[0:3] | |
answer.append(summary_point) | |
l = [] | |
for i in range(len(answer)): | |
l.append("".join(answer[i])) | |
gen_output = [] | |
for i in range(len(l)): | |
gen_output.append(l[i] + ".") | |
listToStr = ' '.join([str(elem) for elem in gen_output]) | |
listToStr = listToStr.replace("\n", "") | |
print(listToStr) | |
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) | |
QA_input = { | |
'question': text, | |
'context': listToStr | |
} | |
print(QA_input) | |
res = nlp(QA_input) | |
values = list(res.values())[3] | |
return values | |
interface = gr.Interface(fn=bullete, | |
inputs="text", | |
outputs="text", | |
title='Bullet Point') | |
interface.launch(inline=False) |