DevBM commited on
Commit
dbb2b74
1 Parent(s): 26381d0

reversing last update

Browse files
Files changed (1) hide show
  1. app.py +5 -83
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, LlamaForCausalLM
4
  import spacy
5
  import nltk
6
  from sklearn.feature_extraction.text import TfidfVectorizer
@@ -32,8 +31,6 @@ import uuid
32
  import time
33
  import asyncio
34
  import aiohttp
35
- import torch
36
- from dotenv import load_dotenv
37
  print("***************************************************************")
38
 
39
  st.set_page_config(
@@ -47,8 +44,6 @@ st.set_page_config(
47
 
48
  st.set_option('deprecation.showPyplotGlobalUse',False)
49
 
50
- HF_TOKEN = st.secrets['HF_TOKEN']
51
-
52
  class QuestionGenerationError(Exception):
53
  """Custom exception for question generation errors."""
54
  pass
@@ -90,7 +85,7 @@ def load_model(modelname):
90
  # Load Spacy Model
91
  @st.cache_resource
92
  def load_nlp_models():
93
- nlp = spacy.load("en_core_web_lg")
94
  s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
95
  return nlp, s2v
96
 
@@ -103,13 +98,6 @@ def load_qa_models():
103
  spell = SpellChecker()
104
  return similarity_model, spell
105
 
106
- @st.cache_resource
107
- def load_llm_model():
108
- model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
109
- tokenizer = AutoTokenizer.from_pretrained(model_name)
110
- model = LlamaForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16, device_map="auto")
111
- return tokenizer, model
112
-
113
  with st.sidebar:
114
  select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
115
  if select_model == "T5-large":
@@ -121,10 +109,6 @@ similarity_model, spell = load_qa_models()
121
  context_model = similarity_model
122
  sentence_model = similarity_model
123
  model, tokenizer = load_model(modelname)
124
- # llm_tokenizer, llm_model = load_llm_model()
125
- llm_tokenizer, llm_model = "meta-llama/Meta-Llama-3-8B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct"
126
- pipe = pipeline("text-generation", model=llm_model, tokenizer=llm_tokenizer, max_new_tokens=200)
127
-
128
  # Info Section
129
  def display_info():
130
  st.sidebar.title("Information")
@@ -334,65 +318,7 @@ def get_word_type(word):
334
  doc = nlp(word)
335
  return doc[0].pos_
336
 
337
- def generate_text_with_llama(prompt):
338
- full_prompt = f"""[INST] {prompt} [/INST]"""
339
- result = pipe(prompt, temperature=0.7, do_sample=True)[0]['generated_text']
340
- # Extract the generated part after the prompt
341
- # return result.split('[/INST]')[-1].strip()
342
- return result
343
-
344
- async def generate_options_with_llm(answer, context, question, n=4):
345
- prompt = f"""Given the following context, question, and correct answer, generate {n-1} incorrect but plausible answer options. The options should be:
346
- 1. Contextually related to the given context
347
- 2. Grammatically consistent with the question
348
- 3. Different from the correct answer
349
- 4. Not explicitly mentioned in the given context
350
-
351
- Context: {context}
352
- Question: {question}
353
- Correct Answer: {answer}
354
-
355
- Provide the options in a comma-separated list.
356
- """
357
-
358
- try:
359
- response = await asyncio.to_thread(generate_text_with_llama, prompt)
360
- options = [option.strip() for option in response.split(',')]
361
- options = [option for option in options if option.lower() != answer.lower()]
362
- print(f"\n\nLLM Options are: {options}\n\n")
363
- return options[:n-1] # Ensure we only return n-1 options
364
- except Exception as e:
365
- st.error(f"Error generating options with LLM: {e}")
366
- return []
367
-
368
-
369
  async def generate_options_async(answer, context, question, n=4):
370
- options = [answer]
371
-
372
- # Generate options using the language model
373
- llm_options = await generate_options_with_llm(answer, context, question, n)
374
- options.extend(llm_options)
375
-
376
- # If we don't have enough options, fall back to previous methods
377
- if len(options) < n:
378
- semantic_options = await generate_semantic_options(answer, context, question, n - len(options))
379
- options.extend(semantic_options)
380
-
381
- # If we still don't have enough options, use the fallback method
382
- while len(options) < n:
383
- fallback_options = await get_fallback_options(answer, context)
384
- for option in fallback_options:
385
- if option not in options and ensure_grammatical_consistency(question, answer, option):
386
- options.append(option)
387
- if len(options) == n:
388
- break
389
-
390
- # Shuffle the options
391
- random.shuffle(options)
392
-
393
- return options
394
-
395
- async def generate_semantic_options(answer, context, question, n=4):
396
  try:
397
  options = [answer]
398
 
@@ -409,7 +335,7 @@ async def generate_semantic_options(answer, context, question, n=4):
409
  for word in context_words:
410
  if get_word_type(word) == answer_type:
411
  similarity = get_semantic_similarity(answer, word)
412
- if 0.2 < similarity < 0.8: # Adjust these thresholds as needed
413
  similar_words.append((word, similarity))
414
 
415
  # Sort by similarity (descending) and take top n-1
@@ -519,16 +445,13 @@ async def generate_questions_async(text, num_questions, context_window_size, num
519
  st.error(f"An unexpected error occurred: {str(e)}")
520
  return []
521
 
522
- async def process_batch(batch, keywords, context_window_size, num_beams, use_llm_options):
523
  questions = []
524
  for text in batch:
525
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
526
  for keyword, context in keyword_sentence_mapping.items():
527
  question = await generate_question_async(context, keyword, num_beams)
528
- if use_llm_options:
529
- options = await generate_options_async(keyword, context, question)
530
- else:
531
- options =await generate_semantic_options(keyword, context, question)
532
  overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
533
  if overall_score >= 0.5:
534
  questions.append({
@@ -604,7 +527,6 @@ def assess_question_quality(context, question, answer):
604
  return overall_score, relevance_score, complexity_score, spelling_correctness
605
 
606
  def main():
607
- # load_dotenv()
608
  # Streamlit interface
609
  st.title(":blue[Question Generator System]")
610
  session_id = get_session_id()
@@ -654,7 +576,7 @@ def main():
654
  start_time = time.time()
655
  with st.spinner("Generating questions..."):
656
  try:
657
- state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords, use_llm_for_options))
658
  if not state['generated_questions']:
659
  st.warning("No questions were generated. The text might be too short or lack suitable content.")
660
  else:
 
1
  import streamlit as st
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
 
3
  import spacy
4
  import nltk
5
  from sklearn.feature_extraction.text import TfidfVectorizer
 
31
  import time
32
  import asyncio
33
  import aiohttp
 
 
34
  print("***************************************************************")
35
 
36
  st.set_page_config(
 
44
 
45
  st.set_option('deprecation.showPyplotGlobalUse',False)
46
 
 
 
47
  class QuestionGenerationError(Exception):
48
  """Custom exception for question generation errors."""
49
  pass
 
85
  # Load Spacy Model
86
  @st.cache_resource
87
  def load_nlp_models():
88
+ nlp = spacy.load("en_core_web_md")
89
  s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
90
  return nlp, s2v
91
 
 
98
  spell = SpellChecker()
99
  return similarity_model, spell
100
 
 
 
 
 
 
 
 
101
  with st.sidebar:
102
  select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
103
  if select_model == "T5-large":
 
109
  context_model = similarity_model
110
  sentence_model = similarity_model
111
  model, tokenizer = load_model(modelname)
 
 
 
 
112
  # Info Section
113
  def display_info():
114
  st.sidebar.title("Information")
 
318
  doc = nlp(word)
319
  return doc[0].pos_
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  async def generate_options_async(answer, context, question, n=4):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  try:
323
  options = [answer]
324
 
 
335
  for word in context_words:
336
  if get_word_type(word) == answer_type:
337
  similarity = get_semantic_similarity(answer, word)
338
+ if 0.3 < similarity < 0.8: # Adjust these thresholds as needed
339
  similar_words.append((word, similarity))
340
 
341
  # Sort by similarity (descending) and take top n-1
 
445
  st.error(f"An unexpected error occurred: {str(e)}")
446
  return []
447
 
448
+ async def process_batch(batch, keywords, context_window_size, num_beams):
449
  questions = []
450
  for text in batch:
451
  keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
452
  for keyword, context in keyword_sentence_mapping.items():
453
  question = await generate_question_async(context, keyword, num_beams)
454
+ options = await generate_options_async(keyword, context, question)
 
 
 
455
  overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
456
  if overall_score >= 0.5:
457
  questions.append({
 
527
  return overall_score, relevance_score, complexity_score, spelling_correctness
528
 
529
  def main():
 
530
  # Streamlit interface
531
  st.title(":blue[Question Generator System]")
532
  session_id = get_session_id()
 
576
  start_time = time.time()
577
  with st.spinner("Generating questions..."):
578
  try:
579
+ state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords))
580
  if not state['generated_questions']:
581
  st.warning("No questions were generated. The text might be too short or lack suitable content.")
582
  else: