HL_ELT / app.py
jeremierostan's picture
Update app.py
2e35634 verified
import gradio as gr
import PyPDF2
from pptx import Presentation
from PIL import Image
import io
import google.generativeai as genai
from jinja2 import Template
import fitz
import os
from hd import msg
import logging
import re
# Set up logging
logging.basicConfig(level=logging.INFO)
# Set up username and password as secrets
username = os.getenv('USERNAME')
password = os.getenv('PASSWORD')
# Set up API key as secret
GOOGLE_API_KEY = os.getenv('GEMINI_API_KEY')
# Configure Gemini API
genai.configure(api_key=GOOGLE_API_KEY)
# Initialize Gemini model
model = genai.GenerativeModel('gemini-1.5-flash')
# Function to extract content from PDF
def extract_content_from_pdf(file_path):
try:
text = ""
images = []
doc = fitz.open(file_path)
for page in doc:
text += page.get_text() + "\n"
for img in page.get_images():
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image = Image.open(io.BytesIO(image_bytes))
images.append(image)
return text, images
except Exception as e:
logging.error(f"Error extracting content from PDF: {e}")
return "", []
# Function to extract content from PPTX
def extract_content_from_pptx(file_path):
try:
text = ""
images = []
prs = Presentation(file_path)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, 'text'):
text += shape.text + "\n"
if shape.shape_type == 13: # Picture
image = shape.image
image_bytes = image.blob
image = Image.open(io.BytesIO(image_bytes))
images.append(image)
return text, images
except Exception as e:
logging.error(f"Error extracting content from PPTX: {e}")
return "", []
# Function to process file
def process_file(file_path):
if file_path is None:
return "No file uploaded", []
try:
if file_path.lower().endswith('.pdf'):
return extract_content_from_pdf(file_path)
elif file_path.lower().endswith('.pptx'):
return extract_content_from_pptx(file_path)
else:
return "Unsupported file format", []
except Exception as e:
logging.error(f"Error processing file: {e}")
return f"An error occurred while processing the file: {str(e)}", []
# Function to clean response
def clean_response(response_text):
# Remove code block markers if present
cleaned = re.sub(r'```python|```', '', response_text).strip()
# Handle newlines and indentation
cleaned = re.sub(r'\n\s*\n', '\n\n', cleaned)
return cleaned
# Function to understand text and images
def understand_content(text, images):
try:
# Prepare content for Gemini
content = [text]
for image in images[:10]:
content.append(image)
# Generate response from Gemini
prompt = """
You are a teaching assistant.
The teacher has class materials for a lesson, but needs to create a document that students will use to complete the work in the teacher's absence and independently.
Analyze the attached educational content and provide a structured response in line with this template:
#1. 'Title' (including subject).
#2. 'Learning Objectives':
-'Know & Understand' (Information and concepts students will be acquiring: 1-3 points)
-'Do' (Skills students will be practicing / developing: 1-3 points).
#3. 'Instructions' (Brief overview):
-'Lesson' (Expands on Know, Understand, Do, providing a bulleted list students can refer to in the future to study)
-'Application' (Sequence of activities to apply/practice the learning)
-'Deliverables' (Submissions required + due date if indicated)
-'Extension' (Opportunities to learn more, go deeper, or fast forward. Create them if they are not present).
Derive your response from the class materials and format your response in a clear, structured manner using markdown headings and bullet points.
"""
response = model.generate_content([prompt] + content)
# Log the raw response for debugging
logging.info(f"Raw response from Gemini: {response.text}")
# Clean the response
cleaned_response = clean_response(response.text)
logging.info(f"Cleaned response: {cleaned_response}")
return cleaned_response
except Exception as e:
logging.error(f"Error in content understanding: {e}")
return "Error in processing the content. Please try again."
# Function to generate ELT plan
def generate_elt_plan(file):
try:
logging.info(f"Processing file: {file.name}")
content, images = process_file(file.name)
if isinstance(content, str) and content.startswith("An error occurred"):
return content
logging.info(f"Extracted content length: {len(content)}, Number of images: {len(images)}")
elt_plan = understand_content(content, images)
return elt_plan
except Exception as e:
logging.error(f"Error in generate_elt_plan: {e}")
return f"An error occurred: {str(e)}"
# Set up Gradio Blocks
with gr.Blocks() as demo:
gr.Image('img.png', width=320, height=180, label='Dash')
gr.Markdown("# ELT Planner Assistant")
with gr.Row():
with gr.Column():
file_input = gr.File(label="Upload PPTX or PDF")
submit_btn = gr.Button("Generate ELT Plan")
output = gr.Markdown(label="Generated ELT Plan")
submit_btn.click(generate_elt_plan, inputs=file_input, outputs=output)
gr.Markdown(msg)
# Launch the protected ELT app
demo.launch(auth=(username,password))