Spaces:

InternationalSchoolOfPanama
/

HL_ELT

Sleeping

App Files Files Community

HL_ELT / app.py

jeremierostan

Update app.py

2e35634 verified 11 months ago

raw

history blame contribute delete

5.97 kB

	import gradio as gr
	import PyPDF2
	from pptx import Presentation
	from PIL import Image
	import io
	import google.generativeai as genai
	from jinja2 import Template
	import fitz
	import os
	from hd import msg
	import logging
	import re

	# Set up logging
	logging.basicConfig(level=logging.INFO)

	# Set up username and password as secrets
	username = os.getenv('USERNAME')
	password = os.getenv('PASSWORD')

	# Set up API key as secret
	GOOGLE_API_KEY = os.getenv('GEMINI_API_KEY')

	# Configure Gemini API
	genai.configure(api_key=GOOGLE_API_KEY)

	# Initialize Gemini model
	model = genai.GenerativeModel('gemini-1.5-flash')

	# Function to extract content from PDF
	def extract_content_from_pdf(file_path):
	try:
	text = ""
	images = []
	doc = fitz.open(file_path)
	for page in doc:
	text += page.get_text() + "\n"
	for img in page.get_images():
	xref = img[0]
	base_image = doc.extract_image(xref)
	image_bytes = base_image["image"]
	image = Image.open(io.BytesIO(image_bytes))
	images.append(image)
	return text, images
	except Exception as e:
	logging.error(f"Error extracting content from PDF: {e}")
	return "", []

	# Function to extract content from PPTX
	def extract_content_from_pptx(file_path):
	try:
	text = ""
	images = []
	prs = Presentation(file_path)
	for slide in prs.slides:
	for shape in slide.shapes:
	if hasattr(shape, 'text'):
	text += shape.text + "\n"
	if shape.shape_type == 13: # Picture
	image = shape.image
	image_bytes = image.blob
	image = Image.open(io.BytesIO(image_bytes))
	images.append(image)
	return text, images
	except Exception as e:
	logging.error(f"Error extracting content from PPTX: {e}")
	return "", []

	# Function to process file
	def process_file(file_path):
	if file_path is None:
	return "No file uploaded", []

	try:
	if file_path.lower().endswith('.pdf'):
	return extract_content_from_pdf(file_path)
	elif file_path.lower().endswith('.pptx'):
	return extract_content_from_pptx(file_path)
	else:
	return "Unsupported file format", []
	except Exception as e:
	logging.error(f"Error processing file: {e}")
	return f"An error occurred while processing the file: {str(e)}", []

	# Function to clean response
	def clean_response(response_text):
	# Remove code block markers if present
	cleaned = re.sub(r'```python\|```', '', response_text).strip()
	# Handle newlines and indentation
	cleaned = re.sub(r'\n\s*\n', '\n\n', cleaned)
	return cleaned

	# Function to understand text and images
	def understand_content(text, images):
	try:
	# Prepare content for Gemini
	content = [text]
	for image in images[:10]:
	content.append(image)

	# Generate response from Gemini
	prompt = """
	You are a teaching assistant.
	The teacher has class materials for a lesson, but needs to create a document that students will use to complete the work in the teacher's absence and independently.
	Analyze the attached educational content and provide a structured response in line with this template:
	#1. 'Title' (including subject).
	#2. 'Learning Objectives':
	-'Know & Understand' (Information and concepts students will be acquiring: 1-3 points)
	-'Do' (Skills students will be practicing / developing: 1-3 points).
	#3. 'Instructions' (Brief overview):
	-'Lesson' (Expands on Know, Understand, Do, providing a bulleted list students can refer to in the future to study)
	-'Application' (Sequence of activities to apply/practice the learning)
	-'Deliverables' (Submissions required + due date if indicated)
	-'Extension' (Opportunities to learn more, go deeper, or fast forward. Create them if they are not present).
	Derive your response from the class materials and format your response in a clear, structured manner using markdown headings and bullet points.
	"""
	response = model.generate_content([prompt] + content)

	# Log the raw response for debugging
	logging.info(f"Raw response from Gemini: {response.text}")

	# Clean the response
	cleaned_response = clean_response(response.text)
	logging.info(f"Cleaned response: {cleaned_response}")

	return cleaned_response

	except Exception as e:
	logging.error(f"Error in content understanding: {e}")
	return "Error in processing the content. Please try again."

	# Function to generate ELT plan
	def generate_elt_plan(file):
	try:
	logging.info(f"Processing file: {file.name}")
	content, images = process_file(file.name)
	if isinstance(content, str) and content.startswith("An error occurred"):
	return content
	logging.info(f"Extracted content length: {len(content)}, Number of images: {len(images)}")
	elt_plan = understand_content(content, images)
	return elt_plan
	except Exception as e:
	logging.error(f"Error in generate_elt_plan: {e}")
	return f"An error occurred: {str(e)}"

	# Set up Gradio Blocks
	with gr.Blocks() as demo:
	gr.Image('img.png', width=320, height=180, label='Dash')
	gr.Markdown("# ELT Planner Assistant")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload PPTX or PDF")
	submit_btn = gr.Button("Generate ELT Plan")

	output = gr.Markdown(label="Generated ELT Plan")

	submit_btn.click(generate_elt_plan, inputs=file_input, outputs=output)
	gr.Markdown(msg)

	# Launch the protected ELT app
	demo.launch(auth=(username,password))