from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline from gensim.parsing.preprocessing import STOPWORDS import wikipedia import gradio as gr import nltk from nltk.tokenize import word_tokenize import re nltk.download('punkt') #model_name = "deepset/roberta-base-squad2" model_name="jaimin/Bullet_Point" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def bullete(text, wikipedia_language="en"): try: question_words = STOPWORDS.union( set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?'])) lower_text = text.lower() lower_text = word_tokenize(lower_text) new_text = [i for i in lower_text if i not in question_words] new_txt = "".join(new_text) if wikipedia_language: wikipedia.set_lang(wikipedia_language) et_page = wikipedia.page(new_txt.replace(" ", "")) title = et_page.title content = et_page.content page_url = et_page.url linked_pages = et_page.links text1 = content except: return "Please write correct question" final_out = re.sub(r'\=.+\=', '', text1) result = list(filter(lambda x: x != '', final_out.split('\n\n'))) answer = [] try: for i in range(len(result[0].split('.'))): nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) QA_input = { 'question': text, 'context': result[0].split('.')[i] } res = nlp(QA_input) print(QA_input) values = list(res.values())[3] answer.append(values) except: gen_output = [] for i in range(len(answer)): gen_output.append("* " + answer[i] + ".") paraphrase = "\n".join(gen_output) final_answer = paraphrase.replace(" ", " ") return final_answer interface = gr.Interface(fn=bullete, inputs="text", outputs="text", title='Bullet Point') interface.launch(inline=False)