File size: 2,201 Bytes
a55c9ac
05743fe
 
 
 
 
 
92824c7
05743fe
a55c9ac
74ea174
 
a55c9ac
 
 
92824c7
 
 
cf2fd10
 
92824c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcdf874
 
92824c7
bcdf874
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05743fe
92824c7
 
05743fe
 
 
 
a55c9ac
4fb030b
92824c7
cf2fd10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
import nltk
from nltk.tokenize import word_tokenize
import re

nltk.download('punkt')

#model_name = "deepset/roberta-base-squad2"
model_name="jaimin/Bullet_Point"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


def bullete(text, wikipedia_language="en"):
    try:
        question_words = STOPWORDS.union(
            set(['likes', 'play', '.', ',', 'like', "don't", '?', 'use', 'choose', 'important', 'better', '?']))
        lower_text = text.lower()
        lower_text = word_tokenize(lower_text)
        new_text = [i for i in lower_text if i not in question_words]
        new_txt = "".join(new_text)
        if wikipedia_language:
            wikipedia.set_lang(wikipedia_language)

        et_page = wikipedia.page(new_txt.replace(" ", ""))
        title = et_page.title
        content = et_page.content
        page_url = et_page.url
        linked_pages = et_page.links

        text1 = content
    except:
        return "Please write correct question"
    final_out = re.sub(r'\=.+\=', '', text1)
    result = list(filter(lambda x: x != '', final_out.split('\n\n')))

    answer = []
    try:
        for i in range(len(result[0].split('.'))):
            nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
            QA_input = {
                'question': text,
                'context': result[0].split('.')[i]
            }
            res = nlp(QA_input)
            print(QA_input)
            values = list(res.values())[3]
            answer.append(values)
    except:
        gen_output = []
        for i in range(len(answer)):
            gen_output.append("* " + answer[i] + ".")
        paraphrase = "\n".join(gen_output)
        final_answer = paraphrase.replace(" ", " ")
        return final_answer



interface = gr.Interface(fn=bullete,
                         inputs="text",
                         outputs="text",
                         title='Bullet Point')

interface.launch(inline=False)