|
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline |
|
from gensim.parsing.preprocessing import STOPWORDS |
|
import wikipedia |
|
import gradio as gr |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
import re |
|
|
|
nltk.download('punkt') |
|
|
|
|
|
model_name="jaimin/Bullet_Point" |
|
model = AutoModelForQuestionAnswering.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
def bullete(text, wikipedia_language="en"): |
|
try: |
|
question_words = STOPWORDS.union( |
|
set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?'])) |
|
lower_text = text.lower() |
|
lower_text = word_tokenize(lower_text) |
|
new_text = [i for i in lower_text if i not in question_words] |
|
new_txt = "".join(new_text) |
|
if wikipedia_language: |
|
wikipedia.set_lang(wikipedia_language) |
|
|
|
et_page = wikipedia.page(new_txt.replace(" ", "")) |
|
title = et_page.title |
|
content = et_page.content |
|
page_url = et_page.url |
|
linked_pages = et_page.links |
|
|
|
text1 = content |
|
except: |
|
return "Please write correct question" |
|
final_out = re.sub(r'\=.+\=', '', text1) |
|
result = list(filter(lambda x: x != '', final_out.split('\n\n'))) |
|
|
|
answer = [] |
|
try: |
|
for i in range(len(result[0].split('.'))): |
|
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) |
|
QA_input = { |
|
'question': text, |
|
'context': result[0].split('.')[i] |
|
} |
|
res = nlp(QA_input) |
|
print(QA_input) |
|
values = list(res.values())[3] |
|
answer.append(values) |
|
except: |
|
gen_output = [] |
|
for i in range(len(answer)): |
|
gen_output.append("* " + answer[i] + ".") |
|
paraphrase = "\n".join(gen_output) |
|
final_answer = paraphrase.replace(" ", " ") |
|
return final_answer |
|
|
|
|
|
|
|
interface = gr.Interface(fn=bullete, |
|
inputs="text", |
|
outputs="text", |
|
title='Bullet Point') |
|
|
|
interface.launch(inline=False) |
|
|
|
|
|
|