Spaces:
Sleeping
Sleeping
| import random | |
| import nltk | |
| from nltk.corpus import wordnet | |
| from nltk.tokenize import word_tokenize | |
| from nltk.tag import pos_tag | |
| class OptionGenerator: | |
| def __init__(self): | |
| """Initialize the option generator with NLTK resources.""" | |
| try: | |
| # Download required NLTK data with explicit resource names | |
| nltk.download('punkt', quiet=True) | |
| nltk.download('averaged_perceptron_tagger', quiet=True) | |
| nltk.download('wordnet', quiet=True) | |
| nltk.download('stopwords', quiet=True) | |
| nltk.download('universal_tagset', quiet=True) | |
| nltk.download('tagsets', quiet=True) | |
| self.stop_words = set(nltk.corpus.stopwords.words('english')) | |
| self.word_net_lemmatizer = nltk.WordNetLemmatizer() | |
| # POS tag mapping for WordNet | |
| self.pos_mapping = { | |
| 'NN': 'n', 'NNS': 'n', 'NNP': 'n', 'NNPS': 'n', | |
| 'VB': 'v', 'VBD': 'v', 'VBG': 'v', 'VBN': 'v', 'VBP': 'v', 'VBZ': 'v', | |
| 'JJ': 'a', 'JJR': 'a', 'JJS': 'a', | |
| 'RB': 'r', 'RBR': 'r', 'RBS': 'r' | |
| } | |
| except Exception as e: | |
| print(f"Error initializing OptionGenerator: {str(e)}") | |
| raise | |
| def _get_synonyms(self, word, pos=None): | |
| """Get synonyms for a word using WordNet.""" | |
| synonyms = set() | |
| # Skip if word is too short or a stop word | |
| if len(word) < 3 or word.lower() in self.stop_words: | |
| return [] | |
| try: | |
| wordnet_pos = self.pos_mapping.get(pos, None) if pos else None | |
| # Try with the provided POS tag first | |
| if wordnet_pos: | |
| for syn in wordnet.synsets(word, pos=wordnet_pos): | |
| for lemma in syn.lemmas(): | |
| synonym = lemma.name().replace('_', ' ').lower() | |
| if synonym != word and len(synonym.split()) == 1: | |
| synonyms.add(synonym) | |
| # If no synonyms found, try without POS tag | |
| if not synonyms: | |
| for syn in wordnet.synsets(word): | |
| for lemma in syn.lemmas(): | |
| synonym = lemma.name().replace('_', ' ').lower() | |
| if synonym != word and len(synonym.split()) == 1: | |
| synonyms.add(synonym) | |
| # If still no synonyms, try with lemmatization | |
| if not synonyms and pos and pos.startswith('VB'): | |
| lemma = self.word_net_lemmatizer.lemmatize(word, pos='v') | |
| if lemma != word: | |
| for syn in wordnet.synsets(lemma, pos='v'): | |
| for l in syn.lemmas(): | |
| synonym = l.name().replace('_', ' ').lower() | |
| if synonym != word and len(synonym.split()) == 1: | |
| synonyms.add(synonym) | |
| except Exception as e: | |
| print(f"Error getting synonyms for '{word}': {str(e)}") | |
| return list(synonyms)[:10] # Return at most 10 synonyms | |
| def _get_distractors(self, word, pos=None, num=3): | |
| """Generate distractors for a given word.""" | |
| distractors = set() | |
| try: | |
| # Get synonyms first | |
| synonyms = self._get_synonyms(word, pos) | |
| distractors.update(synonyms[:num]) | |
| # If not enough synonyms, add similar words | |
| if len(distractors) < num: | |
| wordnet_pos = self.pos_mapping.get(pos, None) if pos else None | |
| similar_words = [] | |
| for syn in wordnet.synsets(word, pos=wordnet_pos): | |
| for lemma in syn.lemmas(): | |
| if lemma.name() != word: | |
| similar_words.append(lemma.name().replace('_', ' ').lower()) | |
| # Add similar words that aren't already in distractors | |
| for w in similar_words: | |
| if w not in distractors and w != word: | |
| distractors.add(w) | |
| if len(distractors) >= num: | |
| break | |
| except Exception as e: | |
| print(f"Error generating distractors for '{word}': {str(e)}") | |
| return list(distractors)[:num] | |
| def extract_answer_from_context(self, question, context): | |
| """ | |
| Extract the most likely answer from the context based on the question. | |
| This version uses simple string matching instead of POS tagging. | |
| Args: | |
| question (str): Generated question | |
| context (str): Source sentence/context | |
| Returns: | |
| str: Extracted answer | |
| """ | |
| try: | |
| q_lower = question.lower() | |
| context_lower = context.lower() | |
| # Common patterns for answers | |
| patterns = [ | |
| ('what is', 'is'), | |
| ('what are', 'are'), | |
| ('what was', 'was'), | |
| ('what were', 'were'), | |
| ('who is', 'is'), | |
| ('who are', 'are'), | |
| ('who was', 'was'), | |
| ('who were', 'were'), | |
| ('where is', 'is'), | |
| ('where are', 'are'), | |
| ('when is', 'is'), | |
| ('when was', 'was') | |
| ] | |
| # Try to find a direct answer using common patterns | |
| for q_pattern, verb in patterns: | |
| if q_lower.startswith(q_pattern): | |
| # Look for the pattern "[verb] [answer]" in the context | |
| verb_pos = context_lower.find(verb) | |
| if verb_pos != -1: | |
| # Get the text after the verb | |
| answer_part = context[verb_pos + len(verb):].strip(' ,.?!') | |
| # Return the first word or phrase | |
| return answer_part.split(',')[0].split('.')[0].strip() | |
| # Fallback: return the first proper noun or capitalized word not in the question | |
| words = context.split() | |
| for word in words: | |
| # Skip short words and words that are in the question | |
| if (len(word) > 2 and word[0].isupper() and | |
| word.lower() not in q_lower and | |
| word.lower() not in self.stop_words): | |
| return word.strip(',.!?;:') | |
| # Last resort: return the first noun-like word | |
| for word in words: | |
| if len(word) > 3 and word.lower() not in q_lower and word.lower() not in self.stop_words: | |
| return word.strip(',.!?;:') | |
| # If all else fails, return the first word that's not a stop word | |
| for word in words: | |
| if word.lower() not in self.stop_words and len(word) > 2: | |
| return word.strip(',.!?;:') | |
| # Final fallback | |
| return context.split()[0] if context else "Unknown" | |
| except Exception as e: | |
| print(f"Error extracting answer: {str(e)}") | |
| # Return the first word as fallback | |
| return context.split()[0] if context else "Unknown" | |
| def create_mcq_options(self, question, context, num_options=4, correct_answer=None, global_keywords=None): | |
| """ | |
| Create multiple choice options for a given question and context. | |
| Args: | |
| question (str): The question text | |
| context (str): The context from which the question was generated | |
| num_options (int): Number of options to generate (including correct answer) | |
| correct_answer (str, optional): The correct answer if known | |
| global_keywords (list, optional): List of keywords from the entire document to use as distractors | |
| Returns: | |
| dict: Dictionary containing options and correct index | |
| """ | |
| try: | |
| # Extract the correct answer from context if not provided | |
| if not correct_answer: | |
| correct_answer = self.extract_answer_from_context(question, context) | |
| # If we couldn't extract a good answer, use a fallback | |
| if not correct_answer or correct_answer == "Unknown": | |
| return { | |
| 'options': ['Option A', 'Option B', 'Option C', 'Option D'], | |
| 'correct_index': 0, | |
| 'correct_answer': 'Option A' | |
| } | |
| # Generate distractors | |
| distractors = self._get_distractors( | |
| correct_answer, | |
| num=min(10, num_options * 2) # Generate more than needed to filter | |
| ) | |
| # Ensure we have unique distractors | |
| distractors = list(set(d for d in distractors if d.lower() != correct_answer.lower())) | |
| # If we don't have enough distractors, try using global keywords | |
| if len(distractors) < num_options - 1 and global_keywords: | |
| # Filter keywords to ensure they are not the correct answer | |
| potential_distractors = [k for k in global_keywords if k.lower() != correct_answer.lower()] | |
| # Shuffle to get random ones | |
| random.shuffle(potential_distractors) | |
| for kw in potential_distractors: | |
| if kw not in distractors: | |
| distractors.append(kw) | |
| if len(distractors) >= num_options + 2: # Get a few extra | |
| break | |
| # If we still don't have enough distractors, add some generic ones | |
| generic_distractors = [ | |
| 'True', 'False', 'Yes', 'No', 'Maybe', 'Always', 'Never', | |
| 'Sometimes', 'Often', 'Rarely', 'All of the above', 'None of the above' | |
| ] | |
| while len(distractors) < num_options - 1 and generic_distractors: | |
| distractor = generic_distractors.pop(0) | |
| if distractor.lower() != correct_answer.lower() and distractor not in distractors: | |
| distractors.append(distractor) | |
| # Select the final set of options | |
| options = [correct_answer] + distractors[:(num_options-1)] | |
| random.shuffle(options) | |
| # Find the index of the correct answer | |
| correct_index = options.index(correct_answer) if correct_answer in options else 0 | |
| return { | |
| 'options': options, | |
| 'correct_index': correct_index, | |
| 'correct_answer': correct_answer | |
| } | |
| except Exception as e: | |
| print(f"Error generating options: {str(e)}") | |
| # Fallback options | |
| return { | |
| 'options': ['Option A', 'Option B', 'Option C', 'Option D'], | |
| 'correct_index': 0, | |
| 'correct_answer': 'Option A' | |
| } | |
| # Example usage | |
| if __name__ == "__main__": | |
| og = OptionGenerator() | |
| test_question = "What is the capital of France?" | |
| test_context = "Paris is the capital of France, known for its art, fashion, and culture." | |
| print(f"Question: {test_question}") | |
| print(f"Context: {test_context}") | |
| mcq = og.create_mcq_options(test_question, test_context) | |
| print("\nOptions:") | |
| for i, option in enumerate(mcq['options']): | |
| marker = "✓" if i == mcq['correct_index'] else " " | |
| print(f"{marker} {chr(65+i)}. {option}") |