AneriThakkar commited on
Commit
126de2b
1 Parent(s): dbb2b74

added email mechanism

Browse files
Files changed (1) hide show
  1. app.py +84 -89
app.py CHANGED
@@ -11,8 +11,7 @@ from functools import lru_cache
11
  nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('brown')
14
- from nltk.tokenize import sent_tokenize, word_tokenize
15
- from nltk.tag import pos_tag
16
  nltk.download('wordnet')
17
  from nltk.corpus import wordnet
18
  import random
@@ -31,6 +30,13 @@ import uuid
31
  import time
32
  import asyncio
33
  import aiohttp
 
 
 
 
 
 
 
34
  print("***************************************************************")
35
 
36
  st.set_page_config(
@@ -107,7 +113,6 @@ elif select_model == "T5-small":
107
  nlp, s2v = load_nlp_models()
108
  similarity_model, spell = load_qa_models()
109
  context_model = similarity_model
110
- sentence_model = similarity_model
111
  model, tokenizer = load_model(modelname)
112
  # Info Section
113
  def display_info():
@@ -148,6 +153,7 @@ def get_pdf_text(pdf_file):
148
  page = doc.load_page(page_num)
149
  text += page.get_text()
150
  return text
 
151
  def save_feedback(question, answer, rating, options, context):
152
  feedback_file = 'question_feedback.json'
153
  if os.path.exists(feedback_file):
@@ -164,10 +170,38 @@ def save_feedback(question, answer, rating, options, context):
164
  }
165
  # feedback_data[question] = rating
166
  feedback_data.append(tpl)
167
-
168
  with open(feedback_file, 'w') as f:
169
  json.dump(feedback_data, f)
170
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  # Function to clean text
173
  def clean_text(text):
@@ -253,7 +287,7 @@ def get_synonyms(word, n=3):
253
  return synonyms
254
  return synonyms
255
 
256
- def get_fallback_options(answer, context, n=3):
257
  options = [answer]
258
 
259
  # Add contextually relevant words using a pre-trained model
@@ -294,84 +328,6 @@ def get_fallback_options(answer, context, n=3):
294
 
295
  return options
296
 
297
- def get_semantic_similarity(word1, word2):
298
- embeddings = sentence_model.encode([word1, word2])
299
- return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
300
-
301
- def ensure_grammatical_consistency(question, answer, option):
302
- question_pos = pos_tag(word_tokenize(question))
303
- answer_pos = pos_tag(word_tokenize(answer))
304
- option_pos = pos_tag(word_tokenize(option))
305
-
306
- # Check if the answer and option have the same part of speech
307
- if answer_pos[-1][1] != option_pos[-1][1]:
308
- return False
309
-
310
- # Check if the option fits grammatically in the question
311
- question_template = question.replace(answer, "PLACEHOLDER")
312
- option_question = question_template.replace("PLACEHOLDER", option)
313
- option_question_pos = pos_tag(word_tokenize(option_question))
314
-
315
- return question_pos == option_question_pos
316
-
317
- def get_word_type(word):
318
- doc = nlp(word)
319
- return doc[0].pos_
320
-
321
- async def generate_options_async(answer, context, question, n=4):
322
- try:
323
- options = [answer]
324
-
325
- # Get context words
326
- doc = nlp(context)
327
- context_words = [token.text for token in doc if token.is_alpha and token.text.lower() != answer.lower()]
328
-
329
- # Get answer type
330
- answer_type = get_word_type(answer)
331
- print(answer_type,"\n")
332
-
333
- # Get semantically similar words
334
- similar_words = []
335
- for word in context_words:
336
- if get_word_type(word) == answer_type:
337
- similarity = get_semantic_similarity(answer, word)
338
- if 0.3 < similarity < 0.8: # Adjust these thresholds as needed
339
- similar_words.append((word, similarity))
340
-
341
- # Sort by similarity (descending) and take top n-1
342
- similar_words.sort(key=lambda x: x[1], reverse=True)
343
- top_similar_words = [word for word, _ in similar_words[:n-1]]
344
-
345
- # Ensure grammatical consistency
346
- consistent_options = []
347
- for word in top_similar_words:
348
- if ensure_grammatical_consistency(question, answer, word):
349
- consistent_options.append(word)
350
- if len(consistent_options) == n-1:
351
- break
352
-
353
- options.extend(consistent_options)
354
-
355
- # If we don't have enough options, fall back to original method
356
- while len(options) < n:
357
- fallback_options = get_fallback_options(answer, context, 3)
358
- for option in fallback_options:
359
- if option not in options and ensure_grammatical_consistency(question, answer, option):
360
- options.append(option)
361
- break
362
-
363
- # Shuffle the options
364
- random.shuffle(options)
365
- print(options)
366
- st.write("All possibel options are: ", options, "\n")
367
- return options
368
-
369
- except Exception as e:
370
- raise QuestionGenerationError(f"Error in generating options: {str(e)}")
371
-
372
-
373
-
374
-
375
  # Function to map keywords to sentences with customizable context window size
376
  def map_keywords_to_sentences(text, keywords, context_window_size):
377
  sentences = sent_tokenize(text)
@@ -411,8 +367,38 @@ async def generate_question_async(context, answer, num_beams):
411
  except Exception as e:
412
  raise QuestionGenerationError(f"Error in question generation: {str(e)}")
413
 
 
 
 
 
 
 
 
 
414
 
 
 
 
 
415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
 
418
  # Function to generate questions using beam search
@@ -451,7 +437,7 @@ async def process_batch(batch, keywords, context_window_size, num_beams):
451
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
452
  for keyword, context in keyword_sentence_mapping.items():
453
  question = await generate_question_async(context, keyword, num_beams)
454
- options = await generate_options_async(keyword, context, question)
455
  overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
456
  if overall_score >= 0.5:
457
  questions.append({
@@ -548,7 +534,6 @@ def main():
548
  num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
549
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
550
  num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
551
- use_llm_for_options = st.toggle("Use AI for Advanced option generation", False)
552
  col1, col2 = st.columns(2)
553
  with col1:
554
  extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
@@ -569,7 +554,7 @@ def main():
569
  if text:
570
  text = clean_text(text)
571
  generate_questions_button = st.button("Generate Questions")
572
- # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
573
 
574
  # if generate_questions_button:
575
  if generate_questions_button and text:
@@ -625,10 +610,20 @@ def main():
625
  q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
626
  q['rating'] = st.select_slider(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
627
  if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
628
- save_feedback(q['question'], q['answer'], q['rating'], q['options'], q['context'])
629
  st.success(f"Feedback submitted for Question {i+1}")
 
 
 
 
 
 
 
 
 
630
  st.write("---")
631
-
 
632
  # Export buttons
633
  # if st.session_state.generated_questions:
634
  if state['generated_questions']:
 
11
  nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('brown')
14
+ from nltk.tokenize import sent_tokenize
 
15
  nltk.download('wordnet')
16
  from nltk.corpus import wordnet
17
  import random
 
30
  import time
31
  import asyncio
32
  import aiohttp
33
+ # '-----------------'
34
+ import smtplib
35
+ from email.mime.multipart import MIMEMultipart
36
+ from email.mime.text import MIMEText
37
+ from email.mime.base import MIMEBase
38
+ from email import encoders
39
+ # '------------------'
40
  print("***************************************************************")
41
 
42
  st.set_page_config(
 
113
  nlp, s2v = load_nlp_models()
114
  similarity_model, spell = load_qa_models()
115
  context_model = similarity_model
 
116
  model, tokenizer = load_model(modelname)
117
  # Info Section
118
  def display_info():
 
153
  page = doc.load_page(page_num)
154
  text += page.get_text()
155
  return text
156
+
157
  def save_feedback(question, answer, rating, options, context):
158
  feedback_file = 'question_feedback.json'
159
  if os.path.exists(feedback_file):
 
170
  }
171
  # feedback_data[question] = rating
172
  feedback_data.append(tpl)
173
+ print(feedback_data)
174
  with open(feedback_file, 'w') as f:
175
  json.dump(feedback_data, f)
176
+
177
+ return feedback_file
178
+ # -----------------------------------------------------------------------------------------
179
+ def send_email_with_attachment(email_subject, email_body, recipient_emails, sender_email, sender_password, attachment_path):
180
+ msg = MIMEMultipart()
181
+ msg['From'] = sender_email
182
+ msg['To'] = ", ".join(recipient_emails) # Join the list of recipients with commas
183
+
184
+ msg['Subject'] = email_subject
185
+
186
+ msg.attach(MIMEText(email_body, 'plain'))
187
+
188
+ attachment = open(attachment_path, 'rb')
189
+ part = MIMEBase('application', 'octet-stream')
190
+ part.set_payload(attachment.read())
191
+ encoders.encode_base64(part)
192
+ part.add_header('Content-Disposition', f'attachment; filename={os.path.basename(attachment_path)}')
193
+
194
+ msg.attach(part)
195
+ attachment.close()
196
+
197
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
198
+ server.starttls()
199
+ print(sender_email)
200
+ print(sender_password)
201
+ server.login(sender_email, sender_password)
202
+ text = msg.as_string()
203
+ server.sendmail(sender_email, recipient_emails, text)
204
+ # ----------------------------------------------------------------------------------
205
 
206
  # Function to clean text
207
  def clean_text(text):
 
287
  return synonyms
288
  return synonyms
289
 
290
+ def generate_options(answer, context, n=3):
291
  options = [answer]
292
 
293
  # Add contextually relevant words using a pre-trained model
 
328
 
329
  return options
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  # Function to map keywords to sentences with customizable context window size
332
  def map_keywords_to_sentences(text, keywords, context_window_size):
333
  sentences = sent_tokenize(text)
 
367
  except Exception as e:
368
  raise QuestionGenerationError(f"Error in question generation: {str(e)}")
369
 
370
+ async def generate_options_async(answer, context, n=3):
371
+ try:
372
+ options = [answer]
373
+
374
+ # Add contextually relevant words using a pre-trained model
375
+ context_embedding = await asyncio.to_thread(context_model.encode, context)
376
+ answer_embedding = await asyncio.to_thread(context_model.encode, answer)
377
+ context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
378
 
379
+ # Compute similarity scores and sort context words
380
+ similarity_scores = [util.pytorch_cos_sim(await asyncio.to_thread(context_model.encode, word), answer_embedding).item() for word in context_words]
381
+ sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
382
+ options.extend(sorted_context_words[:n])
383
 
384
+ # Try to get similar words based on sense2vec
385
+ similar_words = await asyncio.to_thread(get_similar_words_sense2vec, answer, n)
386
+ options.extend(similar_words)
387
+
388
+ # If we don't have enough options, try synonyms
389
+ if len(options) < n + 1:
390
+ synonyms = await asyncio.to_thread(get_synonyms, answer, n - len(options) + 1)
391
+ options.extend(synonyms)
392
+
393
+ # Ensure we have the correct number of unique options
394
+ options = list(dict.fromkeys(options))[:n+1]
395
+
396
+ # Shuffle the options
397
+ random.shuffle(options)
398
+
399
+ return options
400
+ except Exception as e:
401
+ raise QuestionGenerationError(f"Error in generating options: {str(e)}")
402
 
403
 
404
  # Function to generate questions using beam search
 
437
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
438
  for keyword, context in keyword_sentence_mapping.items():
439
  question = await generate_question_async(context, keyword, num_beams)
440
+ options = await generate_options_async(keyword, context)
441
  overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
442
  if overall_score >= 0.5:
443
  questions.append({
 
534
  num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
535
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
536
  num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
 
537
  col1, col2 = st.columns(2)
538
  with col1:
539
  extract_all_keywords = st.toggle("Extract Max Keywords",value=False)
 
554
  if text:
555
  text = clean_text(text)
556
  generate_questions_button = st.button("Generate Questions")
557
+ st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
558
 
559
  # if generate_questions_button:
560
  if generate_questions_button and text:
 
610
  q['question'] = st.text_input(f"Edit Question {i+1}:", value=q['question'], key=f"question_{i}")
611
  q['rating'] = st.select_slider(f"Rate this question (1-5)", options=[1, 2, 3, 4, 5], key=f"rating_{i}")
612
  if st.button(f"Submit Feedback for Question {i+1}", key=f"submit_{i}"):
613
+ feedback_file=save_feedback(q['question'], q['answer'], q['rating'], q['options'], q['context'])
614
  st.success(f"Feedback submitted for Question {i+1}")
615
+ pswd = st.secrets['EMAIL_PASSWORD']
616
+ send_email_with_attachment(
617
+ email_subject='feedback from QGen',
618
+ email_body='Please find the attached feedback JSON file.',
619
+ recipient_emails=['apjc01unique@gmail.com', 'channingfisher7@gmail.com'],
620
+ sender_email='apjc01unique@gmail.com',
621
+ sender_password=pswd,
622
+ attachment_path=feedback_file)
623
+ st.write("Feedback sent to admin")
624
  st.write("---")
625
+
626
+
627
  # Export buttons
628
  # if st.session_state.generated_questions:
629
  if state['generated_questions']: