import gradio as gr import PyPDF2 from pptx import Presentation from PIL import Image import io import google.generativeai as genai from jinja2 import Template import fitz import os from hd import msg import logging import re # Set up logging logging.basicConfig(level=logging.INFO) # Set up username and password as secrets username = os.getenv('USERNAME') password = os.getenv('PASSWORD') # Set up API key as secret GOOGLE_API_KEY = os.getenv('GEMINI_API_KEY') # Configure Gemini API genai.configure(api_key=GOOGLE_API_KEY) # Initialize Gemini model model = genai.GenerativeModel('gemini-1.5-flash') # Function to extract content from PDF def extract_content_from_pdf(file_path): try: text = "" images = [] doc = fitz.open(file_path) for page in doc: text += page.get_text() + "\n" for img in page.get_images(): xref = img[0] base_image = doc.extract_image(xref) image_bytes = base_image["image"] image = Image.open(io.BytesIO(image_bytes)) images.append(image) return text, images except Exception as e: logging.error(f"Error extracting content from PDF: {e}") return "", [] # Function to extract content from PPTX def extract_content_from_pptx(file_path): try: text = "" images = [] prs = Presentation(file_path) for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, 'text'): text += shape.text + "\n" if shape.shape_type == 13: # Picture image = shape.image image_bytes = image.blob image = Image.open(io.BytesIO(image_bytes)) images.append(image) return text, images except Exception as e: logging.error(f"Error extracting content from PPTX: {e}") return "", [] # Function to process file def process_file(file_path): if file_path is None: return "No file uploaded", [] try: if file_path.lower().endswith('.pdf'): return extract_content_from_pdf(file_path) elif file_path.lower().endswith('.pptx'): return extract_content_from_pptx(file_path) else: return "Unsupported file format", [] except Exception as e: logging.error(f"Error processing file: {e}") return f"An error occurred while processing the file: {str(e)}", [] # Function to clean response def clean_response(response_text): # Remove code block markers if present cleaned = re.sub(r'```python|```', '', response_text).strip() # Handle newlines and indentation cleaned = re.sub(r'\n\s*\n', '\n\n', cleaned) return cleaned # Function to understand text and images def understand_content(text, images): try: # Prepare content for Gemini content = [text] for image in images[:10]: content.append(image) # Generate response from Gemini prompt = """ You are a teaching assistant. The teacher has class materials for a lesson, but needs to create a document that students will use to complete the work in the teacher's absence and independently. Analyze the attached educational content and provide a structured response in line with this template: #1. 'Title' (including subject). #2. 'Learning Objectives': -'Know & Understand' (Information and concepts students will be acquiring: 1-3 points) -'Do' (Skills students will be practicing / developing: 1-3 points). #3. 'Instructions' (Brief overview): -'Lesson' (Expands on Know, Understand, Do, providing a bulleted list students can refer to in the future to study) -'Application' (Sequence of activities to apply/practice the learning) -'Deliverables' (Submissions required + due date if indicated) -'Extension' (Opportunities to learn more, go deeper, or fast forward. Create them if they are not present). Derive your response from the class materials and format your response in a clear, structured manner using markdown headings and bullet points. """ response = model.generate_content([prompt] + content) # Log the raw response for debugging logging.info(f"Raw response from Gemini: {response.text}") # Clean the response cleaned_response = clean_response(response.text) logging.info(f"Cleaned response: {cleaned_response}") return cleaned_response except Exception as e: logging.error(f"Error in content understanding: {e}") return "Error in processing the content. Please try again." # Function to generate ELT plan def generate_elt_plan(file): try: logging.info(f"Processing file: {file.name}") content, images = process_file(file.name) if isinstance(content, str) and content.startswith("An error occurred"): return content logging.info(f"Extracted content length: {len(content)}, Number of images: {len(images)}") elt_plan = understand_content(content, images) return elt_plan except Exception as e: logging.error(f"Error in generate_elt_plan: {e}") return f"An error occurred: {str(e)}" # Set up Gradio Blocks with gr.Blocks() as demo: gr.Image('img.png', width=320, height=180, label='Dash') gr.Markdown("# ELT Planner Assistant") with gr.Row(): with gr.Column(): file_input = gr.File(label="Upload PPTX or PDF") submit_btn = gr.Button("Generate ELT Plan") output = gr.Markdown(label="Generated ELT Plan") submit_btn.click(generate_elt_plan, inputs=file_input, outputs=output) gr.Markdown(msg) # Launch the protected ELT app demo.launch(auth=(username,password))