Spaces:
Sleeping
Sleeping
import gradio as gr | |
import PyPDF2 | |
from pptx import Presentation | |
from PIL import Image | |
import io | |
import google.generativeai as genai | |
from jinja2 import Template | |
import fitz | |
import os | |
from hd import msg | |
import logging | |
import re | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
# Set up username and password as secrets | |
username = os.getenv('USERNAME') | |
password = os.getenv('PASSWORD') | |
# Set up API key as secret | |
GOOGLE_API_KEY = os.getenv('GEMINI_API_KEY') | |
# Configure Gemini API | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# Initialize Gemini model | |
model = genai.GenerativeModel('gemini-1.5-flash') | |
# Function to extract content from PDF | |
def extract_content_from_pdf(file_path): | |
try: | |
text = "" | |
images = [] | |
doc = fitz.open(file_path) | |
for page in doc: | |
text += page.get_text() + "\n" | |
for img in page.get_images(): | |
xref = img[0] | |
base_image = doc.extract_image(xref) | |
image_bytes = base_image["image"] | |
image = Image.open(io.BytesIO(image_bytes)) | |
images.append(image) | |
return text, images | |
except Exception as e: | |
logging.error(f"Error extracting content from PDF: {e}") | |
return "", [] | |
# Function to extract content from PPTX | |
def extract_content_from_pptx(file_path): | |
try: | |
text = "" | |
images = [] | |
prs = Presentation(file_path) | |
for slide in prs.slides: | |
for shape in slide.shapes: | |
if hasattr(shape, 'text'): | |
text += shape.text + "\n" | |
if shape.shape_type == 13: # Picture | |
image = shape.image | |
image_bytes = image.blob | |
image = Image.open(io.BytesIO(image_bytes)) | |
images.append(image) | |
return text, images | |
except Exception as e: | |
logging.error(f"Error extracting content from PPTX: {e}") | |
return "", [] | |
# Function to process file | |
def process_file(file_path): | |
if file_path is None: | |
return "No file uploaded", [] | |
try: | |
if file_path.lower().endswith('.pdf'): | |
return extract_content_from_pdf(file_path) | |
elif file_path.lower().endswith('.pptx'): | |
return extract_content_from_pptx(file_path) | |
else: | |
return "Unsupported file format", [] | |
except Exception as e: | |
logging.error(f"Error processing file: {e}") | |
return f"An error occurred while processing the file: {str(e)}", [] | |
# Function to clean response | |
def clean_response(response_text): | |
# Remove code block markers if present | |
cleaned = re.sub(r'```python|```', '', response_text).strip() | |
# Handle newlines and indentation | |
cleaned = re.sub(r'\n\s*\n', '\n\n', cleaned) | |
return cleaned | |
# Function to understand text and images | |
def understand_content(text, images): | |
try: | |
# Prepare content for Gemini | |
content = [text] | |
for image in images[:10]: | |
content.append(image) | |
# Generate response from Gemini | |
prompt = """ | |
You are a teaching assistant. | |
The teacher has class materials for a lesson, but needs to create a document that students will use to complete the work in the teacher's absence and independently. | |
Analyze the attached educational content and provide a structured response in line with this template: | |
#1. 'Title' (including subject). | |
#2. 'Learning Objectives': | |
-'Know & Understand' (Information and concepts students will be acquiring: 1-3 points) | |
-'Do' (Skills students will be practicing / developing: 1-3 points). | |
#3. 'Instructions' (Brief overview): | |
-'Lesson' (Expands on Know, Understand, Do, providing a bulleted list students can refer to in the future to study) | |
-'Application' (Sequence of activities to apply/practice the learning) | |
-'Deliverables' (Submissions required + due date if indicated) | |
-'Extension' (Opportunities to learn more, go deeper, or fast forward. Create them if they are not present). | |
Derive your response from the class materials and format your response in a clear, structured manner using markdown headings and bullet points. | |
""" | |
response = model.generate_content([prompt] + content) | |
# Log the raw response for debugging | |
logging.info(f"Raw response from Gemini: {response.text}") | |
# Clean the response | |
cleaned_response = clean_response(response.text) | |
logging.info(f"Cleaned response: {cleaned_response}") | |
return cleaned_response | |
except Exception as e: | |
logging.error(f"Error in content understanding: {e}") | |
return "Error in processing the content. Please try again." | |
# Function to generate ELT plan | |
def generate_elt_plan(file): | |
try: | |
logging.info(f"Processing file: {file.name}") | |
content, images = process_file(file.name) | |
if isinstance(content, str) and content.startswith("An error occurred"): | |
return content | |
logging.info(f"Extracted content length: {len(content)}, Number of images: {len(images)}") | |
elt_plan = understand_content(content, images) | |
return elt_plan | |
except Exception as e: | |
logging.error(f"Error in generate_elt_plan: {e}") | |
return f"An error occurred: {str(e)}" | |
# Set up Gradio Blocks | |
with gr.Blocks() as demo: | |
gr.Image('img.png', width=320, height=180, label='Dash') | |
gr.Markdown("# ELT Planner Assistant") | |
with gr.Row(): | |
with gr.Column(): | |
file_input = gr.File(label="Upload PPTX or PDF") | |
submit_btn = gr.Button("Generate ELT Plan") | |
output = gr.Markdown(label="Generated ELT Plan") | |
submit_btn.click(generate_elt_plan, inputs=file_input, outputs=output) | |
gr.Markdown(msg) | |
# Launch the protected ELT app | |
demo.launch(auth=(username,password)) |