Spaces:

jayantp2003
/

Bloomsphere-app

Sleeping

App Files Files Community

Bloomsphere-app / helper.py

jayantp2003

Upload 8 files

078d100 verified 9 months ago

raw

history blame contribute delete

13.7 kB

	import pdfplumber
	from langchain.text_splitter import CharacterTextSplitter
	from openai import OpenAI
	import json
	import numpy as np
	import time
	import re
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from dotenv import load_dotenv
	import os
	load_dotenv()

	def process_file(filepath):
	"""Process file and generate chunks"""
	content = []
	with pdfplumber.open(filepath) as pdf:
	for page in pdf.pages:
	text = page.extract_text()
	if text: # Avoid NoneType errors
	content.append(text)

	# Join extracted text with proper spacing
	full_text = "\n\n".join(content)

	# Apply chunking
	text_splitter = CharacterTextSplitter(
	chunk_size=50000,
	chunk_overlap=10
	)

	chunks = text_splitter.split_text(full_text)

	# Vectorize and get similarities
	query = ""
	vectorizer = TfidfVectorizer()
	vectors = vectorizer.fit_transform([query] + chunks)
	cosine_similarities = cosine_similarity(vectors[0:1], vectors[1:]).flatten()

	# Select top chunks
	if len(chunks) > 8:
	top_n = int(len(chunks)/2)
	else:
	top_n = len(chunks)
	if len(query) < 5:
	top_n = len(chunks)
	top_indices = cosine_similarities.argsort()[-top_n:][::-1]

	return chunks


	def givemcqquestion(chunks, create, evaluate, analyze, apply, understand, remember, level,questions):
	if level == 1:
	game = "easy and non-tricky with simple options"
	elif level == 2:
	game = "tricky and medium-level lengthy questions"
	elif level == 3:
	game = "hard and tricky and lengthy questions"
	else:
	raise ValueError("Invalid level. Choose 1 (easy), 2 (medium), or 3 (hard).")

	prompt = f"""You are an AI designed to generate high-quality {game} level multiple-choice questions (MCQs) for educational assessments, following Bloom’s Taxonomy. Your task is to create a well-structured question that aligns with the given cognitive level, ensuring it accurately reflects the required depth of understanding.

	Instructions:
	For each Bloom’s Taxonomy level, follow the guidelines below to ensure appropriate question complexity:
	- Knowledge (Remembering): Formulate a factual or recall-based question.
	- Comprehension (Understanding): Create a question that requires explanation or interpretation.
	- Application (Applying): Develop a question that applies knowledge to a new situation.
	- Analysis (Analyzing): Design a question that encourages breaking down concepts.
	- Synthesis (Creating): Construct a question requiring idea combination or new approaches.
	- Evaluation (Evaluating): Generate a question that involves judgment or assessment.

	STRICT RULES:
	- Generate exactly {questions} MCQ based on the given context and Bloom’s Taxonomy level.
	- Return the response as a structured JSON object without any additional text.
	- The question should reflect the complexity required for the given cognitive level.
	- Options should be plausible, with only one correct answer clearly identifiable.
	- Ensure a structured rubric to evaluate student responses.

	Input Parameters:
	- Context: {chunks} (Relevant learning material)
	- Bloom’s Taxonomy Distribution:
	- Understanding: {understand*100}%
	- Analysis: {analyze*100}%
	- Evaluation: {evaluate*100}%
	- Synthesis: {create*100}%
	- Application: {apply*100}%
	- Knowledge: {remember*100}%

	Expected JSON Output Format:

	{{
	"question": "<Your MCQ Question>",
	"options": {{
	"A": "<Option A>",
	"B": "<Option B>",
	"C": "<Option C>",
	"D": "<Option D>"
	}},
	"correct_answer": "<Correct Option>",
	"rubric": {{
	"key_concept_assessed": "<Briefly explain what is being tested>",
	"criteria_for_correct_answer": "<Explain why the correct answer is correct>",
	"common_misconceptions": "<List potential incorrect assumptions>",
	"cognitive_skill_tested": "<Describe how it aligns with Bloom’s Taxonomy>"
	}}
	}}

	"""
	print("API KEY",os.getenv("API_KEY"))
	print("BASE URL",os.getenv("GENERATOR_BASE_URL"))
	print("MODEL NAME",os.getenv("MODEL_NAME"))
	client = OpenAI(
	api_key=os.getenv("API_KEY"),
	base_url=os.getenv("GENERATOR_BASE_URL")
	)

	response = client.chat.completions.create(
	model=os.getenv("MODEL_NAME"),
	messages=[{"role": "user", "content": prompt}]
	)

	mcq = response.choices[0].message.content
	if "```json" in mcq:
	mcq = mcq.replace("```json","")
	mcq = mcq.replace("```","")
	mcq = mcq.replace("\n","")
	mcq = json.loads(mcq)
	return mcq,prompt


	def givetruefalsequestion(chunks, create, evaluate, analyze, apply, understand, remember, level,questions):
	if level == 1:
	game = "easy and straightforward statements"
	elif level == 2:
	game = "moderate complexity with slight trickiness"
	elif level == 3:
	game = "complex and tricky statements requiring deep understanding"
	else:
	raise ValueError("Invalid level. Choose 1 (easy), 2 (medium), or 3 (hard).")

	prompt = f"""You are an AI designed to generate high-quality {game} level True/False questions for educational assessments, following Bloom’s Taxonomy. Your task is to create a well-structured True/False question that aligns with the given cognitive level, ensuring it accurately reflects the required depth of understanding.

	### Instructions:
	For each Bloom’s Taxonomy level, follow the guidelines below to ensure appropriate question complexity:
	- Knowledge (Remembering): Generate a straightforward fact-based statement.
	- Comprehension (Understanding): Formulate a statement that requires explanation or interpretation.
	- Application (Applying): Develop a statement that applies knowledge to a new situation.
	- Analysis (Analyzing): Design a statement that involves breaking down concepts.
	- Synthesis (Creating): Construct a statement requiring combining ideas or new approaches.
	- Evaluation (Evaluating): Generate a statement requiring judgment or assessment.

	### STRICT RULES:
	- Generate exactly {questions} True/False question.
	- Return the response as a structured JSON object without any additional text.
	- The question should reflect the complexity required for the given cognitive level.
	- Ensure a structured rubric to evaluate student responses.

	### Input Parameters:
	- Context: {chunks} (Relevant learning material)
	- Bloom’s Taxonomy Distribution:
	- Understanding: {understand*100}%
	- Analysis: {analyze*100}%
	- Evaluation: {evaluate*100}%
	- Synthesis: {create*100}%
	- Application: {apply*100}%
	- Knowledge: {remember*100}%

	### Expected JSON Output Format:
	```json
	{{
	"statement": "<Your True/False Statement>",
	"correct_answer": "<True or False>",
	"rubric": {{
	"key_concept_assessed": "<Briefly explain what is being tested>",
	"criteria_for_correct_answer": "<Explain why the correct answer is correct>",
	"common_misconceptions": "<List potential incorrect assumptions>",
	"cognitive_skill_tested": "<Describe how it aligns with Bloom’s Taxonomy>"
	}}
	}}
	"""
	client = OpenAI(
	api_key=os.getenv("API_KEY"),
	base_url=os.getenv("GENERATOR_BASE_URL")
	)

	response = client.chat.completions.create(
	model=os.getenv("MODEL_NAME"),
	messages=[{"role": "user", "content": prompt}]
	)

	tf_question = response.choices[0].message.content
	if "```json" in tf_question:
	tf_question = tf_question.replace("```json", "").replace("```", "")
	tf_question = tf_question.replace("\n", "")
	tf_question = json.loads(tf_question)
	return tf_question




	def giveopenquestion(chunks, create, evaluate, analyze, apply, understand, remember, level, questions):
	# Validate input parameters
	bloom_params = {
	'create': create,
	'evaluate': evaluate,
	'analyze': analyze,
	'apply': apply,
	'understand': understand,
	'remember': remember
	}

	if not all(0 <= val <= 1 for val in bloom_params.values()):
	raise ValueError("All Bloom's parameters must be between 0 and 1")

	if level not in [1, 2, 3]:
	raise ValueError("Invalid level. Choose 1 (easy), 2 (medium), or 3 (hard).")

	# Complexity description
	complexity_levels = {
	1: "simple recall-based questions",
	2: "moderate explanation questions",
	3: "complex analytical questions"
	}
	complexity = complexity_levels.get(level)

	prompt = f"""Generate {questions} open-ended question(s) based on the provided context, strictly following these requirements:

	### CONTEXT:
	{chunks}

	### BLOOM'S TAXONOMY DISTRIBUTION:
	- Creating: {create*100}%
	- Evaluating: {evaluate*100}%
	- Analyzing: {analyze*100}%
	- Applying: {apply*100}%
	- Understanding: {understand*100}%
	- Remembering: {remember*100}%

	### COGNITIVE LEVEL:
	{complexity} (Level {level})

	### OUTPUT REQUIREMENTS:
	- Return ONLY valid JSON format
	- Include detailed rubric with cognitive skill mapping
	- For each question, specify which Bloom's level it primarily targets

	### RESPONSE FORMAT:
	```json
	{{
	"metadata": {{
	"blooms_distribution": {{
	"create": {create},
	"evaluate": {evaluate},
	"analyze": {analyze},
	"apply": {apply},
	"understand": {understand},
	"remember": {remember}
	}},
	"complexity_level": {level}
	}},
	"questions": [
	{{
	"question": "Question text",
	"primary_blooms_level": "create\|evaluate\|analyze\|apply\|understand\|remember",
	"rubric": {{
	"key_concept": "...",
	"criteria": "...",
	"misconceptions": "...",
	"cognitive_skills": {{
	"primary": "...",
	"secondary": ["...", "..."]
	}}
	}}
	}}
	]
	}}
	```
	"""

	client = OpenAI(
	api_key=os.getenv("API_KEY"),
	base_url=os.getenv("GENERATOR_BASE_URL")
	)

	try:
	response = client.chat.completions.create(
	model=os.getenv("MODEL_NAME"),
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=1200
	)
	raw_response = response.choices[0].message.content

	# Extract JSON from response
	json_match = re.search(r'```json(.*?)```', raw_response, re.DOTALL)
	json_str = json_match.group(1).strip() if json_match else raw_response.strip()

	# Parse and validate JSON
	result = json.loads(json_str)

	# Validate structure
	if not all(key in result for key in ['metadata', 'questions']):
	raise ValueError("Response missing required fields")

	return result

	except json.JSONDecodeError as e:
	print(f"JSON Decode Error: {e}")
	print(f"Problematic response:\n{raw_response}")
	raise
	except Exception as e:
	print(f"API Error: {e}")
	raise



	def generate_questions_from_file(filepath, mcq, tf, qna, create, evaluate, analyze, apply, understand, remember, level):
	"""Main function to generate questions from file"""
	# Process file first
	chunks = process_file(filepath)

	# Generate questions using existing functionality
	MAX_RETRIES = 3
	RETRY_DELAY = 1

	def get_random_chunk():
	return chunks[np.random.randint(len(chunks))] if chunks else ""

	def generate_questions(q_type, count, generator):
	results = []
	for _ in range(count):
	for attempt in range(MAX_RETRIES):
	try:
	chunk = get_random_chunk()
	question = generator(chunk, create, evaluate, analyze, apply,
	understand, remember, level, questions=1)
	results.append(question)
	break
	except Exception as e:
	print(f"Error generating {q_type} question (attempt {attempt+1}): {str(e)}")
	if attempt == MAX_RETRIES - 1:
	results.append({"error": f"Failed to generate {q_type} question"})
	time.sleep(RETRY_DELAY)
	return results

	return {
	'mcq': generate_questions("MCQ", mcq, givemcqquestion),
	'tf': generate_questions("True/False", tf, givetruefalsequestion),
	'qna': generate_questions("Q&A", qna, giveopenquestion)
	}

	if __name__ == "__main__":
	# Example usage
	filepath = "data/eco.pdf"
	mcq = 1
	tf = 1
	qna = 1
	level = 1
	create = 0.2
	evaluate = 0.2
	analyze = 0.2
	apply = 0.2
	understand = 0.2
	remember = 0.2

	questions = generate_questions_from_file(
	filepath, mcq, tf, qna, create, evaluate,
	analyze, apply, understand, remember, level
	)
	print(questions)