ashishraics commited on
Commit
1d43b95
1 Parent(s): 8ea8337

final basic app

Browse files
Files changed (9) hide show
  1. .gitignore +5 -0
  2. .streamlit/config.toml +34 -0
  3. app.py +164 -0
  4. config.yaml +0 -0
  5. flagged/log.csv +3 -0
  6. hf_space1.png +0 -0
  7. requirements.txt +4 -0
  8. test.py +98 -0
  9. test2.py +38 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ falgged/
2
+ venv/
3
+
4
+ test2.py/
5
+ .idea/
.streamlit/config.toml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ #theme primary
3
+ base="light"
4
+ # Primary accent color for interactive elements.
5
+ #primaryColor =
6
+
7
+ # Background color for the main content area.
8
+ #backgroundColor =
9
+
10
+ # Background color used for the sidebar and most interactive widgets.
11
+ #secondaryBackgroundColor ='grey'
12
+
13
+ # Color used for almost all text.
14
+ #textColor ='blue'
15
+
16
+ # Font family for all text in the app, except code blocks. One of "sans serif", "serif", or "monospace".
17
+ # Default: "sans serif"
18
+ font = "sans serif"
19
+
20
+ # [logger]
21
+ # level='info'
22
+ # messageFormat = "%(message)s"
23
+ #messageFormat="%(asctime)s %(message)s"
24
+
25
+ [global]
26
+
27
+ # By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem.
28
+ # If you'd like to turn off this warning, set this to True.
29
+ # Default: false
30
+ disableWatchdogWarning = false
31
+
32
+ # If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py".
33
+ # Default: true
34
+ showWarningOnDirectExecution = false
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import gradio as gr
3
+ import streamlit as st
4
+ import onnxruntime as ort
5
+ import pke
6
+ import nltk
7
+ nltk.download('stopwords')
8
+ nltk.download('wordnet')
9
+ nltk.download('punkt')
10
+ from nltk.corpus import stopwords,wordnet
11
+ from nltk.tokenize import sent_tokenize
12
+ from flashtext import KeywordProcessor
13
+ import regex as re
14
+ import string
15
+ import subprocess
16
+ from PIL import Image
17
+ import multiprocessing
18
+ total_threads=multiprocessing.cpu_count()
19
+
20
+ st.set_page_config( # Alternate names: setup_page, page, layout
21
+ layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
22
+ initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
23
+ page_title='None', # String or None. Strings get appended with "• Streamlit".
24
+ )
25
+
26
+ def set_page_title(title):
27
+ st.sidebar.markdown(unsafe_allow_html=True, body=f"""
28
+ <iframe height=0 srcdoc="<script>
29
+ const title = window.parent.document.querySelector('title') \
30
+
31
+ const oldObserver = window.parent.titleObserver
32
+ if (oldObserver) {{
33
+ oldObserver.disconnect()
34
+ }} \
35
+
36
+ const newObserver = new MutationObserver(function(mutations) {{
37
+ const target = mutations[0].target
38
+ if (target.text !== '{title}') {{
39
+ target.text = '{title}'
40
+ }}
41
+ }}) \
42
+
43
+ newObserver.observe(title, {{ childList: true }})
44
+ window.parent.titleObserver = newObserver \
45
+
46
+ title.text = '{title}'
47
+ </script>" />
48
+ """)
49
+
50
+
51
+ set_page_title('Fill Blanks')
52
+
53
+ def tokenize_sentence(text):
54
+ sentences=sent_tokenize(text)
55
+ sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
56
+ return sentences
57
+
58
+
59
+ def get_noun_adj_verb(text):
60
+ output = []
61
+ try:
62
+ extractor = pke.unsupervised.MultipartiteRank()
63
+ extractor.load_document(input=text, language='en',normalization=None)
64
+
65
+ # keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
66
+ extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
67
+
68
+ # candidate weighting,
69
+ extractor.candidate_weighting(threshold=0.74,method='average',alpha=1.1)
70
+
71
+ #extract top n
72
+ keyphrases = extractor.get_n_best(n=5)
73
+
74
+ for val in keyphrases:
75
+ output.append(val[0])
76
+ except Exception as e:
77
+ pass
78
+ return output
79
+
80
+
81
+
82
+ def get_keywords_sentence(keywords,tokenized_sent):
83
+ keyword_sent_dict = {}
84
+
85
+ for k in keywords:
86
+ keyword_sent_dict[k]=[]
87
+ for s in tokenized_sent:
88
+ if k in s:
89
+ keyword_sent_dict[k].append(s)
90
+
91
+ #sort sentnece in decreasing order of length
92
+ for k in keyword_sent_dict.keys():
93
+ values=keyword_sent_dict[k]
94
+ values=sorted(values,key=len,reverse=True)
95
+ keyword_sent_dict[k]=values
96
+
97
+ return keyword_sent_dict
98
+
99
+
100
+
101
+ def create_blanks(keyword_sentence_dict):
102
+ answer=[]
103
+ fib=[]
104
+ for k in keyword_sentence_dict.keys():
105
+ sent=keyword_sentence_dict[k][0]#take 1st sentence
106
+ sent=re.sub(k,'____________',sent)
107
+ answer.append(k)
108
+ fib.append(sent)
109
+ return answer,fib
110
+
111
+ #title using markdown
112
+ st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
113
+ st.markdown("---")
114
+ with st.sidebar:
115
+ # title using markdown
116
+ st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
117
+ select_task=st.selectbox(label="Select task from drop down menu",
118
+ options=['README',
119
+ 'Basic Fill Blanks'])
120
+
121
+ default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
122
+
123
+
124
+
125
+ img = Image.open("hf_space1.png")
126
+ if select_task=='README':
127
+ st.header("Summary")
128
+ st.write(f"The App gives you ability to create *Fill in the blanks* Capability just like Ed-Techs.Currently.It has {total_threads} CPU cores but only 1 is available per user so "
129
+ f"inference time will be on the higher side.")
130
+ st.markdown("---")
131
+ st.image(img)
132
+
133
+ if select_task=='Basic Fill Blanks':
134
+ input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
135
+ create_fib=st.button("Create Questions")
136
+
137
+ tokenized_sent = tokenize_sentence(input_text)
138
+ keywords_noun_adj_verb=get_noun_adj_verb(input_text)
139
+ keyword_sent_noun_verb_adj=get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
140
+
141
+ answer,fib=create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
142
+
143
+ if create_fib:
144
+ for i,(answer,fib) in enumerate(zip(answer,fib)):
145
+ st.markdown(f"* {fib} | **Answer is *{answer}* ** ",unsafe_allow_html=True)
146
+
147
+
148
+
149
+ # demo = gr.Interface(fn=get_noun_adj_verb,
150
+ # inputs=gr.inputs.Textbox(lines=10,default=default_paratext),
151
+ # outputs=gr.outputs.Textbox(),
152
+ # allow_flagging='never',
153
+ # layout='vertical',
154
+ # title="Make Fill in the Blanks using your text",
155
+ # )
156
+ #
157
+ # if __name__ == "__main__":
158
+ # demo.launch()
159
+
160
+
161
+
162
+
163
+
164
+
config.yaml ADDED
File without changes
flagged/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 'name','output','timestamp'
2
+ '','','2022-05-16 01:13:57.520111'
3
+ 'On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details.','','2022-05-16 01:28:38.173283'
hf_space1.png ADDED
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit~=1.9.0
2
+ onnxruntime~=1.11.1
3
+ Pillow~=9.1.0
4
+ git+https://github.com/boudinfl/pke.git
test.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+
3
+ from pke.data_structures import Candidate
4
+ from pke.readers import RawTextReader, SpacyDocReader, PreprocessedReader
5
+
6
+ from nltk import RegexpParser
7
+ from nltk.stem.snowball import SnowballStemmer
8
+ import pke
9
+ from pke.lang import stopwords, langcodes
10
+
11
+ from string import punctuation
12
+ import os
13
+ import logging
14
+ import spacy
15
+
16
+ nlp = spacy.load("en_core_web_sm")
17
+
18
+ from pke.base import LoadFile
19
+ class LoadFileNew(LoadFile):
20
+
21
+ def load_document(self, input, language=None, stoplist=None,
22
+ normalization='stemming', spacy_model=nlp):
23
+ # Reset object for new document
24
+ self.__init__()
25
+
26
+ # get the language parameter
27
+ if language is None:
28
+ language = 'en'
29
+
30
+ # set the language of the document
31
+ self.language = language
32
+
33
+ # word normalization (filling self.sentences[].stems)
34
+ self.normalization = normalization
35
+
36
+ # initialize the stoplist
37
+ if stoplist:
38
+ self.stoplist = stoplist
39
+ else:
40
+ self.stoplist = stopwords.get(self.language)
41
+
42
+ # check whether input is a spacy doc object instance
43
+ if isinstance(input, spacy.tokens.doc.Doc):
44
+ parser = SpacyDocReader()
45
+ sents = parser.read(spacy_doc=input)
46
+ logging.error('check whether input is a spacy doc object instance')
47
+ # check whether input is a string
48
+ elif isinstance(input, str):
49
+ parser = RawTextReader(language=self.language)
50
+ sents = parser.read(text=input, spacy_model=spacy_model)
51
+ logging.error('check whether input is a string')
52
+ # check whether input is processed text
53
+ elif isinstance(input, list) and all(isinstance(item, list) for item in input):
54
+ parser = PreprocessedReader()
55
+ sents = parser.read(list_of_sentence_tuples=input)
56
+ logging.error('check whether input is processed text')
57
+ else:
58
+ logging.error('Cannot process input. It is neither a spacy doc or a string: {}'.format(type(input)))
59
+ # TODO raise TypeError('Cannot process input. It is neither a spacy doc, a string or a list of tuple: {}'.format(type(input)))) ?
60
+ return
61
+
62
+ # populate the sentences
63
+ self.sentences = sents
64
+
65
+ # TODO: this code could go into Reader.normalize ? Hum, not sure
66
+ if self.normalization == 'stemming':
67
+ # fall back to porter if english language (or unavailable languages) is used
68
+ try:
69
+ langcode = langcodes.get(self.language)
70
+ if langcode == "english":
71
+ langcode = 'porter'
72
+ stemmer = SnowballStemmer(langcode)
73
+ except ValueError:
74
+ logging.error('No stemmer available for \'{}\' language -> fall back to porter.'.format(self.language))
75
+ stemmer = SnowballStemmer("porter")
76
+
77
+ # populate Sentence.stems
78
+ for i, sentence in enumerate(self.sentences):
79
+ self.sentences[i].stems = [stemmer.stem(w).lower() for w in sentence.words]
80
+
81
+ else:
82
+ for i, sentence in enumerate(self.sentences):
83
+ self.sentences[i].stems = [w.lower() for w in sentence.words]
84
+
85
+ return self.sentences
86
+
87
+ #
88
+ test = LoadFileNew()
89
+ text="On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."
90
+ out=test.load_document(input=text)
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
test2.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pke
2
+ import nltk
3
+ nltk.download('stopwords')
4
+ nltk.download('wordnet')
5
+ nltk.download('punkt')
6
+ from nltk.corpus import stopwords,wordnet
7
+ import string
8
+
9
+
10
+ def get_noun_adj_verb(text):
11
+ output = []
12
+ try:
13
+ # initialize keyphrase extraction model, MultipartiteRank is the most recent model
14
+ extractor = pke.unsupervised.MultipartiteRank()
15
+ extractor.load_document(input=text, language='en',normalization=None)
16
+
17
+ stoplist = list(string.punctuation)
18
+ stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
19
+ stoplist += stopwords.words('english')
20
+
21
+ # keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB' e.g POS={'VERB}
22
+ extractor.candidate_selection(pos={'NOUN', 'PROPN', 'ADJ'})
23
+ # candidate weighting,
24
+ extractor.candidate_weighting(threshold=0.74,alpha=1.1,method='average')
25
+
26
+ keyphrases = extractor.get_n_best(n=5)
27
+
28
+ for val in keyphrases:
29
+ output.append(val[0])
30
+
31
+ except Exception as e:
32
+ return e
33
+
34
+ return output
35
+
36
+ default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
37
+
38
+ print(type(get_noun_adj_verb(default_paratext)))