ashishraics commited on
Commit
ae34bdf
1 Parent(s): 593d5c4

bug fix spacy

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +34 -25
.gitignore CHANGED
@@ -3,3 +3,4 @@ venv/
3
 
4
  test2.py/
5
  .idea/
 
 
3
 
4
  test2.py/
5
  .idea/
6
+ .gitmodules/
app.py CHANGED
@@ -11,11 +11,18 @@ import regex as re
11
  import string
12
  import subprocess
13
  from PIL import Image
 
14
  import multiprocessing
15
  total_threads=multiprocessing.cpu_count()
16
 
17
- subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
18
- import pke
 
 
 
 
 
 
19
 
20
  st.set_page_config( # Alternate names: setup_page, page, layout
21
  layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
@@ -50,18 +57,27 @@ def set_page_title(title):
50
 
51
  set_page_title('Fill Blanks')
52
 
 
 
 
53
  def tokenize_sentence(text):
54
  sentences=sent_tokenize(text)
55
  sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
56
  return sentences
57
 
58
 
 
 
 
 
 
 
 
59
  def get_noun_adj_verb(text):
60
  output = []
61
  try:
62
  extractor = pke.unsupervised.MultipartiteRank()
63
  extractor.load_document(input=text, language='en',normalization=None)
64
-
65
  # keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
66
  extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
67
 
@@ -74,11 +90,9 @@ def get_noun_adj_verb(text):
74
  for val in keyphrases:
75
  output.append(val[0])
76
  except Exception as e:
77
- pass
78
  return output
79
 
80
-
81
-
82
  def get_keywords_sentence(keywords,tokenized_sent):
83
  keyword_sent_dict = {}
84
 
@@ -96,8 +110,6 @@ def get_keywords_sentence(keywords,tokenized_sent):
96
 
97
  return keyword_sent_dict
98
 
99
-
100
-
101
  def create_blanks(keyword_sentence_dict):
102
  answer=[]
103
  fib=[]
@@ -108,6 +120,14 @@ def create_blanks(keyword_sentence_dict):
108
  fib.append(sent)
109
  return answer,fib
110
 
 
 
 
 
 
 
 
 
111
  #title using markdown
112
  st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
113
  st.markdown("---")
@@ -118,9 +138,6 @@ with st.sidebar:
118
  options=['README',
119
  'Basic Fill Blanks'])
120
 
121
- default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
122
-
123
-
124
 
125
  img = Image.open("hf_space1.png")
126
  if select_task=='README':
@@ -133,22 +150,14 @@ if select_task=='README':
133
  if select_task=='Basic Fill Blanks':
134
  input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
135
  create_fib=st.button("Create Questions")
136
- tokenized_sent = tokenize_sentence(input_text)
137
- keywords_noun_adj_verb = get_noun_adj_verb(input_text)
138
- keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
139
- answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
140
- for i, (answer, fib) in enumerate(zip(answer, fib)):
141
- st.markdown(f"* {fib} | **Answer is *{answer}* ** ", unsafe_allow_html=True)
142
 
143
  if create_fib:
144
- st.write(1)
145
- with st.spinner("Creating"):
146
- tokenized_sent = tokenize_sentence(input_text)
147
- keywords_noun_adj_verb = get_noun_adj_verb(input_text)
148
- keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
149
- answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
150
- for i,(answer,fib) in enumerate(zip(answer,fib)):
151
- st.markdown(f"* {fib} | **Answer is *{answer}* ** ",unsafe_allow_html=True)
152
 
153
 
154
 
 
11
  import string
12
  import subprocess
13
  from PIL import Image
14
+ import pke
15
  import multiprocessing
16
  total_threads=multiprocessing.cpu_count()
17
 
18
+ try:
19
+ import pke
20
+ logging.error("importing pke info")
21
+ except:
22
+ logging.error("installing pke info")
23
+ subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
24
+ subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en-core-web-sm-3.3.0'])
25
+ import pke
26
 
27
  st.set_page_config( # Alternate names: setup_page, page, layout
28
  layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
 
57
 
58
  set_page_title('Fill Blanks')
59
 
60
+ default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
61
+
62
+
63
  def tokenize_sentence(text):
64
  sentences=sent_tokenize(text)
65
  sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
66
  return sentences
67
 
68
 
69
+ # extractor = pke.unsupervised.MultipartiteRank()
70
+ # extractor.load_document(input=default_paratext, language='en', normalization=None)
71
+ # extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
72
+ # extractor.candidate_weighting(threshold=0.74, method='average', alpha=1.1)
73
+ # keyphrases = extractor.get_n_best(n=5)
74
+ # print('keyphrases', keyphrases)
75
+
76
  def get_noun_adj_verb(text):
77
  output = []
78
  try:
79
  extractor = pke.unsupervised.MultipartiteRank()
80
  extractor.load_document(input=text, language='en',normalization=None)
 
81
  # keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
82
  extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
83
 
 
90
  for val in keyphrases:
91
  output.append(val[0])
92
  except Exception as e:
93
+ print("found exception",e)
94
  return output
95
 
 
 
96
  def get_keywords_sentence(keywords,tokenized_sent):
97
  keyword_sent_dict = {}
98
 
 
110
 
111
  return keyword_sent_dict
112
 
 
 
113
  def create_blanks(keyword_sentence_dict):
114
  answer=[]
115
  fib=[]
 
120
  fib.append(sent)
121
  return answer,fib
122
 
123
+ # default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
124
+ # input_text=default_paratext
125
+ # tokenized_sent = tokenize_sentence(input_text)
126
+ # keywords_noun_adj_verb = get_noun_adj_verb(input_text)
127
+ # keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
128
+ # answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
129
+
130
+
131
  #title using markdown
132
  st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
133
  st.markdown("---")
 
138
  options=['README',
139
  'Basic Fill Blanks'])
140
 
 
 
 
141
 
142
  img = Image.open("hf_space1.png")
143
  if select_task=='README':
 
150
  if select_task=='Basic Fill Blanks':
151
  input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
152
  create_fib=st.button("Create Questions")
 
 
 
 
 
 
153
 
154
  if create_fib:
155
+ tokenized_sent = tokenize_sentence(input_text)
156
+ keywords_noun_adj_verb = get_noun_adj_verb(input_text)
157
+ keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
158
+ answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
159
+ for i,(answer,fib) in enumerate(zip(answer,fib)):
160
+ st.write(f"* {fib} | **Answer is *{answer}* ** ")
 
 
161
 
162
 
163