umair894 Technozam commited on
Commit
b2d0b8d
0 Parent(s):

Duplicate from Technozam/mcqs

Browse files

Co-authored-by: Muzammil <Technozam@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ s2v_old/freqs.json filter=lfs diff=lfs merge=lfs -text
36
+ s2v_old/key2row filter=lfs diff=lfs merge=lfs -text
37
+ s2v_old/strings.json filter=lfs diff=lfs merge=lfs -text
38
+ s2v_old/vectors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mcqs
3
+ emoji: 💻
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Technozam/mcqs
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from textwrap3 import wrap
3
+
4
+ text = """Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company
5
+ Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve
6
+ system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin
7
+ rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin. In a recent tweet,
8
+ Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and
9
+ transaction, and hence was suspending vehicle purchases using the cryptocurrency. A day later he again tweeted saying, “To be clear, I strongly
10
+ believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”. It triggered a downward spiral for Bitcoin value but
11
+ the cryptocurrency has stabilised since. A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising
12
+ that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency."""
13
+
14
+ for wrp in wrap(text, 150):
15
+ print (wrp)
16
+ print ("\n")
17
+
18
+ """## Example 2"""
19
+
20
+ import torch
21
+ from transformers import T5ForConditionalGeneration,T5Tokenizer
22
+ summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
23
+ summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
24
+
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ summary_model = summary_model.to(device)
27
+
28
+ import random
29
+ import numpy as np
30
+
31
+ def set_seed(seed: int):
32
+ random.seed(seed)
33
+ np.random.seed(seed)
34
+ torch.manual_seed(seed)
35
+ torch.cuda.manual_seed_all(seed)
36
+
37
+ set_seed(42)
38
+
39
+ import nltk
40
+ nltk.download('punkt')
41
+ nltk.download('brown')
42
+ nltk.download('wordnet')
43
+ from nltk.corpus import wordnet as wn
44
+ from nltk.tokenize import sent_tokenize
45
+
46
+ def postprocesstext (content):
47
+ final=""
48
+ for sent in sent_tokenize(content):
49
+ sent = sent.capitalize()
50
+ final = final +" "+sent
51
+ return final
52
+
53
+
54
+ def summarizer(text,model,tokenizer):
55
+ text = text.strip().replace("\n"," ")
56
+ text = "summarize: "+text
57
+ # print (text)
58
+ max_len = 512
59
+ encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt").to(device)
60
+
61
+ input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
62
+
63
+ outs = model.generate(input_ids=input_ids,
64
+ attention_mask=attention_mask,
65
+ early_stopping=True,
66
+ num_beams=3,
67
+ num_return_sequences=1,
68
+ no_repeat_ngram_size=2,
69
+ min_length = 75,
70
+ max_length=300)
71
+
72
+
73
+ dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]
74
+ summary = dec[0]
75
+ summary = postprocesstext(summary)
76
+ summary= summary.strip()
77
+
78
+ return summary
79
+
80
+
81
+ summarized_text = summarizer(text,summary_model,summary_tokenizer)
82
+
83
+
84
+ print ("\noriginal Text >>")
85
+ for wrp in wrap(text, 150):
86
+ print (wrp)
87
+ print ("\n")
88
+ print ("Summarized Text >>")
89
+ for wrp in wrap(summarized_text, 150):
90
+ print (wrp)
91
+ print ("\n")
92
+
93
+ """# **Answer Span Extraction (Keywords and Noun Phrases)**"""
94
+
95
+ total = 10
96
+
97
+ import nltk
98
+ nltk.download('stopwords')
99
+ from nltk.corpus import stopwords
100
+ import string
101
+ import pke
102
+ import traceback
103
+
104
+ def get_nouns_multipartite(content):
105
+ out=[]
106
+ try:
107
+ extractor = pke.unsupervised.MultipartiteRank()
108
+ extractor.load_document(input=content,language='en')
109
+ # not contain punctuation marks or stopwords as candidates.
110
+ pos = {'PROPN','NOUN'}
111
+ #pos = {'PROPN','NOUN'}
112
+ stoplist = list(string.punctuation)
113
+ stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
114
+ stoplist += stopwords.words('english')
115
+ # extractor.candidate_selection(pos=pos, stoplist=stoplist)
116
+ extractor.candidate_selection(pos=pos)
117
+ # 4. build the Multipartite graph and rank candidates using random walk,
118
+ # alpha controls the weight adjustment mechanism, see TopicRank for
119
+ # threshold/method parameters.
120
+ extractor.candidate_weighting(alpha=1.1,
121
+ threshold=0.75,
122
+ method='average')
123
+ keyphrases = extractor.get_n_best(n=15)
124
+
125
+
126
+ for val in keyphrases:
127
+ out.append(val[0])
128
+ except:
129
+ out = []
130
+ traceback.print_exc()
131
+
132
+ return out
133
+
134
+ from flashtext import KeywordProcessor
135
+
136
+
137
+ def get_keywords(originaltext,summarytext,total):
138
+ keywords = get_nouns_multipartite(originaltext)
139
+ print ("keywords unsummarized: ",keywords)
140
+ keyword_processor = KeywordProcessor()
141
+ for keyword in keywords:
142
+ keyword_processor.add_keyword(keyword)
143
+
144
+ keywords_found = keyword_processor.extract_keywords(summarytext)
145
+ keywords_found = list(set(keywords_found))
146
+ print ("keywords_found in summarized: ",keywords_found)
147
+
148
+ important_keywords =[]
149
+ for keyword in keywords:
150
+ if keyword in keywords_found:
151
+ important_keywords.append(keyword)
152
+
153
+ return important_keywords[:total]
154
+
155
+
156
+ imp_keywords = get_keywords(text,summarized_text,total)
157
+ print (imp_keywords)
158
+
159
+ """# **Question generation with T5**"""
160
+
161
+ question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
162
+ question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
163
+ question_model = question_model.to(device)
164
+
165
+ def get_question(context,answer,model,tokenizer):
166
+ text = "context: {} answer: {}".format(context,answer)
167
+ encoding = tokenizer.encode_plus(text,max_length=384, pad_to_max_length=False,truncation=True, return_tensors="pt").to(device)
168
+ input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
169
+
170
+ outs = model.generate(input_ids=input_ids,
171
+ attention_mask=attention_mask,
172
+ early_stopping=True,
173
+ num_beams=5,
174
+ num_return_sequences=1,
175
+ no_repeat_ngram_size=2,
176
+ max_length=72)
177
+
178
+
179
+ dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]
180
+
181
+
182
+ Question = dec[0].replace("question:","")
183
+ Question= Question.strip()
184
+ return Question
185
+
186
+
187
+
188
+ for wrp in wrap(summarized_text, 150):
189
+ print (wrp)
190
+ print ("\n")
191
+
192
+ for answer in imp_keywords:
193
+ ques = get_question(summarized_text,answer,question_model,question_tokenizer)
194
+ print (ques)
195
+ print (answer.capitalize())
196
+ print ("\n")
197
+
198
+ """# **Gradio UI Visualization**"""
199
+
200
+ # wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
201
+ # !tar -xvf s2v_reddit_2015_md.tar.gz
202
+
203
+ import numpy as np
204
+ from sense2vec import Sense2Vec
205
+ s2v = Sense2Vec().from_disk('s2v_old')
206
+
207
+ from sentence_transformers import SentenceTransformer
208
+ # paraphrase-distilroberta-base-v1
209
+ sentence_transformer_model = SentenceTransformer('msmarco-distilbert-base-v3')
210
+
211
+ from similarity.normalized_levenshtein import NormalizedLevenshtein
212
+ normalized_levenshtein = NormalizedLevenshtein()
213
+
214
+ def filter_same_sense_words(original,wordlist):
215
+ filtered_words=[]
216
+ base_sense =original.split('|')[1]
217
+ print (base_sense)
218
+ for eachword in wordlist:
219
+ if eachword[0].split('|')[1] == base_sense:
220
+ filtered_words.append(eachword[0].split('|')[0].replace("_", " ").title().strip())
221
+ return filtered_words
222
+
223
+ def get_highest_similarity_score(wordlist,wrd):
224
+ score=[]
225
+ for each in wordlist:
226
+ score.append(normalized_levenshtein.similarity(each.lower(),wrd.lower()))
227
+ return max(score)
228
+
229
+ def sense2vec_get_words(word,s2v,topn,question):
230
+ output = []
231
+ print ("word ",word)
232
+ try:
233
+ sense = s2v.get_best_sense(word, senses= ["NOUN", "PERSON","PRODUCT","LOC","ORG","EVENT","NORP","WORK OF ART","FAC","GPE","NUM","FACILITY"])
234
+ most_similar = s2v.most_similar(sense, n=topn)
235
+ # print (most_similar)
236
+ output = filter_same_sense_words(sense,most_similar)
237
+ print ("Similar ",output)
238
+ except:
239
+ output =[]
240
+
241
+ threshold = 0.6
242
+ final=[word]
243
+ checklist =question.split()
244
+ for x in output:
245
+ if get_highest_similarity_score(final,x)<threshold and x not in final and x not in checklist:
246
+ final.append(x)
247
+
248
+ return final[1:]
249
+
250
+ def mmr(doc_embedding, word_embeddings, words, top_n, lambda_param):
251
+
252
+ # Extract similarity within words, and between words and the document
253
+ word_doc_similarity = cosine_similarity(word_embeddings, doc_embedding)
254
+ word_similarity = cosine_similarity(word_embeddings)
255
+
256
+ # Initialize candidates and already choose best keyword/keyphrase
257
+ keywords_idx = [np.argmax(word_doc_similarity)]
258
+ candidates_idx = [i for i in range(len(words)) if i != keywords_idx[0]]
259
+
260
+ for _ in range(top_n - 1):
261
+ # Extract similarities within candidates and
262
+ # between candidates and selected keywords/phrases
263
+ candidate_similarities = word_doc_similarity[candidates_idx, :]
264
+ target_similarities = np.max(word_similarity[candidates_idx][:, keywords_idx], axis=1)
265
+
266
+ # Calculate MMR
267
+ mmr = (lambda_param) * candidate_similarities - (1-lambda_param) * target_similarities.reshape(-1, 1)
268
+ mmr_idx = candidates_idx[np.argmax(mmr)]
269
+
270
+ # Update keywords & candidates
271
+ keywords_idx.append(mmr_idx)
272
+ candidates_idx.remove(mmr_idx)
273
+
274
+ return [words[idx] for idx in keywords_idx]
275
+
276
+ from collections import OrderedDict
277
+ from sklearn.metrics.pairwise import cosine_similarity
278
+
279
+ def get_distractors_wordnet(word):
280
+ distractors=[]
281
+ try:
282
+ syn = wn.synsets(word,'n')[0]
283
+
284
+ word= word.lower()
285
+ orig_word = word
286
+ if len(word.split())>0:
287
+ word = word.replace(" ","_")
288
+ hypernym = syn.hypernyms()
289
+ if len(hypernym) == 0:
290
+ return distractors
291
+ for item in hypernym[0].hyponyms():
292
+ name = item.lemmas()[0].name()
293
+ #print ("name ",name, " word",orig_word)
294
+ if name == orig_word:
295
+ continue
296
+ name = name.replace("_"," ")
297
+ name = " ".join(w.capitalize() for w in name.split())
298
+ if name is not None and name not in distractors:
299
+ distractors.append(name)
300
+ except:
301
+ print ("Wordnet distractors not found")
302
+ return distractors
303
+
304
+ def get_distractors (word,origsentence,sense2vecmodel,sentencemodel,top_n,lambdaval):
305
+ distractors = sense2vec_get_words(word,sense2vecmodel,top_n,origsentence)
306
+ print ("distractors ",distractors)
307
+ if len(distractors) ==0:
308
+ return distractors
309
+ distractors_new = [word.capitalize()]
310
+ distractors_new.extend(distractors)
311
+ # print ("distractors_new .. ",distractors_new)
312
+
313
+ embedding_sentence = origsentence+ " "+word.capitalize()
314
+ # embedding_sentence = word
315
+ keyword_embedding = sentencemodel.encode([embedding_sentence])
316
+ distractor_embeddings = sentencemodel.encode(distractors_new)
317
+
318
+ # filtered_keywords = mmr(keyword_embedding, distractor_embeddings,distractors,4,0.7)
319
+ max_keywords = min(len(distractors_new),5)
320
+ filtered_keywords = mmr(keyword_embedding, distractor_embeddings,distractors_new,max_keywords,lambdaval)
321
+ # filtered_keywords = filtered_keywords[1:]
322
+ final = [word.capitalize()]
323
+ for wrd in filtered_keywords:
324
+ if wrd.lower() !=word.lower():
325
+ final.append(wrd.capitalize())
326
+ final = final[1:]
327
+ return final
328
+
329
+ sent = "What cryptocurrency did Musk rarely tweet about?"
330
+ keyword = "Bitcoin"
331
+
332
+ # sent = "What did Musk say he was working with to improve system transaction efficiency?"
333
+ # keyword= "Dogecoin"
334
+
335
+
336
+ # sent = "What company did Musk say would not accept bitcoin payments?"
337
+ # keyword= "Tesla"
338
+
339
+
340
+ # sent = "What has Musk often tweeted in support of?"
341
+ # keyword = "Cryptocurrency"
342
+
343
+ print (get_distractors(keyword,sent,s2v,sentence_transformer_model,40,0.2))
344
+
345
+ """# **Gradio Visualization with MCQs**"""
346
+
347
+ # import mysql.connector
348
+ # import datetime;
349
+
350
+ # mydb = mysql.connector.connect(
351
+ # host="qtechdb-1.cexugk1h8rui.ap-northeast-1.rds.amazonaws.com",
352
+ # user="admin",
353
+ # password="F3v2vGWzb8vaniE3nqzi",
354
+ # database="spring_social"
355
+ # )
356
+
357
+ import gradio as gr
358
+ import re
359
+
360
+
361
+ context = gr.Textbox(lines=10, placeholder="Enter paragraph/content here...", label="Enter your content (words input must be more than 150 words).")
362
+ total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions")
363
+ subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word)")
364
+
365
+ output = gr.HTML( label="Question and Answers")
366
+
367
+
368
+ def generate_question_text(context,subject,total):
369
+
370
+ words_text = len(re.findall(r'\w+', context))
371
+ words_subject = len(re.findall(r'\w+', subject))
372
+
373
+ if (words_text < 150):
374
+ raise gr.Error("Invalid Input (Words limit must be more than 150 words).")
375
+ # print("Number of words:", words)
376
+
377
+ elif (words_subject < 1):
378
+ raise gr.Error("Invalid Input (Title must be one or more than one word).")
379
+
380
+
381
+ elif (words_subject < 1):
382
+ raise gr.Error("Invalid Input (Title must be one or more than one word).")
383
+
384
+ else:
385
+ summary_text = summarizer(context,summary_model,summary_tokenizer)
386
+ for wrp in wrap(summary_text, 150):
387
+ print (wrp)
388
+ # np = getnounphrases(summary_text,sentence_transformer_model,3)
389
+ np = get_keywords(context,summary_text,total)
390
+ random.shuffle(np)
391
+ print ("\n\nNoun phrases",np)
392
+
393
+ output="<b style='color:black;'>Select/Tick the correct answer.</b><br><br>"
394
+ i = 1
395
+ for answer in np:
396
+ ques = get_question(summary_text,answer,question_model,question_tokenizer)
397
+
398
+ distractors = get_distractors(answer.capitalize(),ques,s2v,sentence_transformer_model,40,0.2)
399
+ # output= output + ques + "\n" + "Ans: "+answer.capitalize() + "\n\n"
400
+ output = output + "<b style='color:black;'>Q"+ str(i) + ") " + ques + "</b><br/>"
401
+ # output = output + "<br>"
402
+ i += 1
403
+
404
+ answerlist=[answer.capitalize()]
405
+
406
+ # output = output + "<br><b> ▪ " +answer.capitalize()+ "</b>"
407
+ for distractor in distractors[:3]:
408
+ answerlist.append(distractor)
409
+ random.shuffle(answerlist)
410
+
411
+ print(answerlist)
412
+
413
+ for answer in answerlist:
414
+ output = output +answer.capitalize()+ "<br/>"
415
+
416
+ output = output + "<br><b style='color:black;'>" + "Correct Answer Key:</b><br>"
417
+
418
+ i = 1
419
+ for answer in np:
420
+ output = output + "<b style='color:green;'>Ans"+ str(i) + ") " +answer.capitalize()+ "</b>"
421
+ output = output + "<br>"
422
+ i += 1
423
+
424
+
425
+ # mycursor = mydb.cursor()
426
+ # timedate = datetime.datetime.now()
427
+
428
+ # sql = "INSERT INTO mcqstexts (subject, input, output, timedate) VALUES (%s,%s, %s,%s)"
429
+ # val = (subject, context, output, timedate)
430
+ # mycursor.execute(sql, val)
431
+
432
+ # mydb.commit()
433
+
434
+ # print(mycursor.rowcount, "record inserted.")
435
+
436
+ return output
437
+
438
+ iface = gr.Interface(
439
+ fn=generate_question_text,
440
+ inputs=[context,subject,total],
441
+ outputs=output,
442
+ allow_flagging="never",flagging_options=["Save Data"])
443
+
444
+ # iface.launch(debug=True, share=True)
445
+
446
+ def generate_question(context,subject,total):
447
+ summary_text = summarizer(context,summary_model,summary_tokenizer)
448
+ for wrp in wrap(summary_text, 150):
449
+ print (wrp)
450
+ # np = getnounphrases(summary_text,sentence_transformer_model,3)
451
+ np = get_keywords(context,summary_text,total)
452
+ random.shuffle(np)
453
+
454
+ print ("\n\nNoun phrases",np)
455
+
456
+ output="<b style='color:black;'>Select/Tick the correct answer.</b><br><br>"
457
+ i = 1
458
+ for answer in np:
459
+ ques = get_question(summary_text,answer,question_model,question_tokenizer)
460
+
461
+ distractors = get_distractors(answer.capitalize(),ques,s2v,sentence_transformer_model,40,0.2)
462
+ # output= output + ques + "\n" + "Ans: "+answer.capitalize() + "\n\n"
463
+ output = output + "<b style='color:black;'>Q"+ str(i) + ") " + ques + "</b><br/>"
464
+ # output = output + "<br>"
465
+ i += 1
466
+
467
+ answerlist=[answer.capitalize()]
468
+
469
+ # output = output + "<br><b> ▪ " +answer.capitalize()+ "</b>"
470
+ for distractor in distractors[:3]:
471
+ answerlist.append(distractor)
472
+ random.shuffle(answerlist)
473
+
474
+ # print(answerlist)
475
+
476
+ for answer in answerlist:
477
+ output = output +answer.capitalize()+ "<br/>"
478
+
479
+ output = output + "<br><b style='color:black;'>" + "Correct Answer Key:</b><br>"
480
+
481
+ i = 1
482
+ for answer in np:
483
+ output = output + "<b style='color:green;'>Ans"+ str(i) + ") " +answer.capitalize()+ "</b>"
484
+ output = output + "<br/>"
485
+ i += 1
486
+
487
+
488
+ # mycursor = mydb.cursor()
489
+ # timedate = datetime.datetime.now()
490
+
491
+ # sql = "INSERT INTO mcqstexts (subject, input, output, timedate) VALUES (%s,%s, %s,%s)"
492
+ # val = (subject, context, output, timedate)
493
+ # mycursor.execute(sql, val)
494
+
495
+ # mydb.commit()
496
+
497
+ # print(mycursor.rowcount, "record inserted.")
498
+
499
+ return output
500
+
501
+ import pandas as pd
502
+
503
+ file =None
504
+
505
+ def filecreate(x,subject,total):
506
+
507
+ with open(x.name) as fo:
508
+ text = fo.read()
509
+ # print(text)
510
+
511
+ words_text = len(re.findall(r'\w+', text))
512
+ words_subject = len(re.findall(r'\w+', subject))
513
+
514
+
515
+ if (words_text < 150):
516
+ raise gr.Error("Invalid Input (Words limit must be more than 150 words).")
517
+ # print("Number of words:", words)
518
+
519
+ elif (words_subject < 1):
520
+ raise gr.Error("Invalid Input (Title must be one or more than one word).")
521
+
522
+ else:
523
+ generated = generate_question(text,subject, total)
524
+
525
+
526
+ return generated
527
+
528
+ # filecreate(file,2)
529
+
530
+ import gradio as gr
531
+
532
+ context = gr.HTML(label="Text")
533
+ file = gr.File(label="Upload your *.txt file (File must contain more than 150 words).")
534
+ total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions")
535
+ subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word).")
536
+
537
+
538
+ fface = gr.Interface(
539
+ fn=filecreate,
540
+ inputs=[file,subject,total],
541
+ outputs=context,
542
+ # css=".gradio-container {background-image: url('file=blue.jpg')}",
543
+ allow_flagging="never",flagging_options=["Save Data"])
544
+
545
+ # fface.launch(debug=True, share=True)
546
+
547
+ demo = gr.TabbedInterface([iface, fface], ["Text", "Upload File"])
548
+ demo.launch(debug=True, show_api=False)
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ textwrap3
2
+ transformers
3
+ nltk
4
+ torch
5
+ sentencepiece
6
+ git+https://github.com/boudinfl/pke.git
7
+ flashtext
8
+ spacy
9
+ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
10
+ sense2vec
11
+ sentence-transformers
12
+ strsim
13
+ ipython-autotime
14
+ cosine-similarity
15
+
s2v_old/._cfg ADDED
Binary file (174 Bytes). View file
 
s2v_old/._freqs.json ADDED
Binary file (174 Bytes). View file
 
s2v_old/._key2row ADDED
Binary file (174 Bytes). View file
 
s2v_old/._strings.json ADDED
Binary file (174 Bytes). View file
 
s2v_old/._vectors ADDED
Binary file (174 Bytes). View file
 
s2v_old/PaxHeader/cfg ADDED
@@ -0,0 +1 @@
 
 
1
+ 30 mtime=1574030044.191361533
s2v_old/cfg ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "senses":[
3
+ "PUNCT",
4
+ "SYM",
5
+ "MONEY",
6
+ "PERCENT",
7
+ "PRODUCT",
8
+ "X",
9
+ "LANGUAGE",
10
+ "DET",
11
+ "LOC",
12
+ "CARDINAL",
13
+ "CONJ",
14
+ "LAW",
15
+ "ORG",
16
+ "PART",
17
+ "VERB",
18
+ "NUM",
19
+ "EVENT",
20
+ "ADP",
21
+ "PERSON",
22
+ "QUANTITY",
23
+ "INTJ",
24
+ "TIME",
25
+ "SPACE",
26
+ "DATE",
27
+ "ADJ",
28
+ "NOUN",
29
+ "NORP",
30
+ "ORDINAL",
31
+ "WORK OF ART",
32
+ "ADV",
33
+ "FAC",
34
+ "GPE"
35
+ ]
36
+ }
s2v_old/freqs.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb75f4bbf927c536d808426c6e9f55ef1f69ab44e473c460b8e13274eab97241
3
+ size 49969681
s2v_old/key2row ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29690c5ab1c96b6f9061b25bf737fee04540187328a3857cea0f9a1b4da46614
3
+ size 16492891
s2v_old/strings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ecd6b643475b42d153c74515cba54c12e28e1edac8abbd51794a6ca4a105e0
3
+ size 26188439
s2v_old/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290724e713d3e8da2ed0f82ab2ad1a1aeaa9d5fe1330baccd26b62a7399f6d71
3
+ size 611973760