jaimin commited on
Commit
05743fe
1 Parent(s): f313f40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -16
app.py CHANGED
@@ -1,28 +1,65 @@
1
- import gradio as gr
2
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 
 
 
 
 
 
 
3
 
4
  model_name = "deepset/roberta-base-squad2"
5
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
 
8
- def pred(que,cont):
9
- nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
10
- QA_input = {
11
- 'question': que,
12
- 'context': cont
13
- }
14
- res = nlp(QA_input)
15
- print(res)
16
- values = list(res.values())[3]
17
 
18
- return values
 
 
 
 
19
 
 
 
 
20
 
21
- interface = gr.Interface(fn=pred,
22
- inputs=["text", "text"],
23
- outputs="text",
24
- title='Bullet Point')
 
 
 
 
 
 
 
25
 
26
- interface.launch(inline=False)
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
 
28
 
 
 
1
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
2
+ from gensim.parsing.preprocessing import STOPWORDS
3
+ import wikipedia
4
+ import gradio as gr
5
+ import nltk
6
+ from nltk.tokenize import word_tokenize
7
+ import re
8
+ nltk.download('punkt')
9
 
10
  model_name = "deepset/roberta-base-squad2"
11
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
+ def bullete(text,wikipedia_language="en"):
15
+ question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?']))
16
+ lower_text = text.lower()
17
+ lower_text = word_tokenize(lower_text)
18
+ new_text = [i for i in lower_text if i not in question_words]
19
+ new_txt = "".join(new_text)
20
+ if wikipedia_language:
21
+ wikipedia.set_lang(wikipedia_language)
 
22
 
23
+ et_page = wikipedia.page(new_txt.replace(" ", ""))
24
+ title = et_page.title
25
+ content = et_page.content
26
+ page_url = et_page.url
27
+ linked_pages = et_page.links
28
 
29
+ text1 = content
30
+ final_out = re.sub(r'\=.+\=', '', text1)
31
+ result = list(filter(lambda x: x != '', final_out.split('\n\n')))
32
 
33
+ answer = []
34
+ for i in range(4):
35
+ if len(result[i]) > 500:
36
+ summary_point=result[i].split(".")[0:3]
37
+ answer.append(summary_point)
38
+ l = []
39
+ for i in range(len(answer)):
40
+ l.append("".join(answer[i]))
41
+ gen_output = []
42
+ for i in range(len(l)):
43
+ gen_output.append(l[i] + ".")
44
 
45
+ listToStr = ' '.join([str(elem) for elem in gen_output])
46
+ listToStr = listToStr.replace("\n", "")
47
+ print(listToStr)
48
+ nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
49
+ QA_input = {
50
+ 'question': text,
51
+ 'context': listToStr
52
+ }
53
+ print(QA_input)
54
+ res = nlp(QA_input)
55
+ values = list(res.values())[3]
56
 
57
+ return values
58
+
59
+
60
+ interface = gr.Interface(fn=bullete,
61
+ inputs="text",
62
+ outputs="text",
63
+ title='Bullet Point')
64
 
65
+ interface.launch(inline=False)