Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from PIL import Image | |
import io | |
from pptx import Presentation | |
from pptx.util import Inches, Pt | |
from pptx.dml.color import RGBColor | |
from pptx.enum.text import PP_ALIGN | |
import docx | |
import PyPDF2 | |
from pptx import Presentation as PPTXPresentation | |
import pandas as pd | |
import numpy as np | |
from typing import List, Dict, Any, Optional, Tuple | |
import time | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import tempfile | |
import logging | |
from pathlib import Path | |
import json | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Nutrition-specific knowledge base | |
NUTRITION_FIELDS = { | |
"Clinical Nutrition": [ | |
"Medical Nutrition Therapy", | |
"Disease-Specific Diets", | |
"Nutritional Assessment", | |
"Hospital Dietetics", | |
"Nutrition Care Process" | |
], | |
"Sports Nutrition": [ | |
"Performance Nutrition", | |
"Sports Supplements", | |
"Hydration Strategies", | |
"Pre/Post Workout Nutrition", | |
"Recovery Nutrition" | |
], | |
"Community Nutrition": [ | |
"Public Health Nutrition", | |
"Food Security", | |
"Population Health", | |
"School Nutrition", | |
"Cultural Food Practices" | |
], | |
"Therapeutic Nutrition": [ | |
"Diet Therapy", | |
"Disease Management", | |
"Metabolic Disorders", | |
"Diabetes Management", | |
"Cardiovascular Nutrition" | |
] | |
} | |
class ContentEnricher: | |
def fetch_open_images(topic: str) -> Optional[str]: | |
"""Fetch free images from Unsplash's public website""" | |
try: | |
url = f"https://source.unsplash.com/featured/?{topic.replace(' ', ',')}" | |
response = requests.get(url, stream=True) | |
if response.status_code == 200: | |
return url | |
except Exception as e: | |
logger.warning(f"Error fetching image: {e}") | |
return None | |
def fetch_pubmed_content(topic: str) -> str: | |
"""Fetch content from PubMed's public RSS feed""" | |
try: | |
url = f"https://pubmed.ncbi.nlm.nih.gov/rss/search/1?term={topic.replace(' ', '+')}" | |
response = requests.get(url) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.content, 'xml') | |
items = soup.find_all('item') | |
content = [] | |
for item in items[:3]: # Get first 3 articles | |
title = item.title.text | |
desc = item.description.text | |
content.append(f"{title}\n{desc}") | |
return "\n\n".join(content) | |
except Exception as e: | |
logger.warning(f"Error fetching PubMed content: {e}") | |
return "" | |
def fetch_nutrition_gov(topic: str) -> str: | |
"""Fetch content from nutrition.gov public pages""" | |
try: | |
url = f"https://www.nutrition.gov/search/{topic.replace(' ', '+')}" | |
response = requests.get(url) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.content, 'html.parser') | |
content = [] | |
for p in soup.find_all('p')[:5]: | |
content.append(p.text.strip()) | |
return "\n".join(content) | |
except Exception as e: | |
logger.warning(f"Error fetching Nutrition.gov content: {e}") | |
return "" | |
class FileProcessor: | |
def process_txt(file) -> str: | |
return file.read().decode('utf-8') | |
def process_docx(file) -> str: | |
doc = docx.Document(file) | |
return "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
def process_pdf(file) -> str: | |
pdf_reader = PyPDF2.PdfReader(file) | |
return "\n".join([page.extract_text() for page in pdf_reader.pages]) | |
def process_pptx(file) -> str: | |
prs = PPTXPresentation(file) | |
text_content = [] | |
for slide in prs.slides: | |
for shape in slide.shapes: | |
if hasattr(shape, "text"): | |
text_content.append(shape.text) | |
return "\n".join(text_content) | |
class PresentationGenerator: | |
def __init__(self): | |
self.text_generator = self.load_text_generator() | |
self.content_enricher = ContentEnricher() | |
self.file_processor = FileProcessor() | |
def load_text_generator(): | |
return pipeline("text-generation", model="gpt2-medium") | |
def add_image_to_slide(self, slide, image_url: str): | |
try: | |
response = requests.get(image_url, stream=True) | |
if response.status_code == 200: | |
img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name | |
with open(img_path, "wb") as f: | |
f.write(response.content) | |
slide.shapes.add_picture(img_path, Inches(1), Inches(1), Inches(4), Inches(3)) | |
except Exception as e: | |
logger.warning(f"Failed to add image to slide: {e}") | |
def create_slide(self, prs: Presentation, title: str, points: List[str], image_url: Optional[str], template_style: str) -> None: | |
slide = prs.slides.add_slide(prs.slide_layouts[1]) | |
# Set title | |
title_shape = slide.shapes.title | |
title_shape.text = title | |
# Style based on template | |
if template_style == "Professional": | |
title_shape.text_frame.paragraphs[0].font.size = Pt(40) | |
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(0, 51, 102) | |
# Add content | |
content_shape = slide.shapes.placeholders[1] | |
text_frame = content_shape.text_frame | |
for point in points: | |
p = text_frame.add_paragraph() | |
p.text = "β’ " + point | |
p.font.size = Pt(18) | |
p.font.color.rgb = RGBColor(51, 51, 51) | |
# Add image if available | |
if image_url: | |
self.add_image_to_slide(slide, image_url) | |
def generate_content(self, topic: str, base_content: str, category: str) -> Dict[str, Any]: | |
"""Generate enhanced content with multiple sources""" | |
try: | |
# Combine content from multiple sources | |
pubmed_content = self.content_enricher.fetch_pubmed_content(topic) | |
nutrition_gov_content = self.content_enricher.fetch_nutrition_gov(topic) | |
combined_content = f"{base_content}\n{pubmed_content}\n{nutrition_gov_content}" | |
# Generate with context | |
prompt = f"Create educational content about {topic} in {category}:\n{combined_content}" | |
generated = self.text_generator( | |
prompt, | |
max_length=500, | |
num_return_sequences=1, | |
temperature=0.7 | |
)[0]['generated_text'] | |
# Process into sections | |
sections = self.process_content(generated) | |
return { | |
'content': generated, | |
'sections': sections, | |
'topic': topic, | |
'category': category | |
} | |
except Exception as e: | |
logger.error(f"Error generating content: {e}") | |
st.error(f"Error generating content: {str(e)}") | |
return None | |
def process_content(self, content: str, max_points: int = 5) -> List[Dict]: | |
"""Process content into well-structured sections""" | |
sections = [] | |
current_section = {'title': '', 'points': []} | |
for line in content.split('\n'): | |
line = line.strip() | |
if not line: | |
continue | |
if line.endswith(':'): | |
if current_section['points']: | |
sections.append(current_section) | |
current_section = {'title': line.rstrip(':'), 'points': []} | |
elif line.startswith(('β’', '*', '-')): | |
if len(current_section['points']) < max_points: | |
current_section['points'].append(line.lstrip('β’*- ')) | |
else: | |
sentences = line.split('. ') | |
for sentence in sentences[:max_points - len(current_section['points'])]: | |
if sentence.strip(): | |
current_section['points'].append(sentence.strip()) | |
if current_section['points']: | |
sections.append(current_section) | |
return sections | |
def create_presentation(self, title: str, content: Dict[str, Any], | |
template: str) -> Optional[io.BytesIO]: | |
try: | |
prs = Presentation() | |
# Title slide | |
title_slide = prs.slides.add_slide(prs.slide_layouts[0]) | |
title_slide.shapes.title.text = title | |
# Process sections | |
progress_bar = st.progress(0) | |
for idx, section in enumerate(content['sections']): | |
image_url = self.content_enricher.fetch_open_images(section['title']) | |
self.create_slide(prs, section['title'], section['points'], image_url, template) | |
progress_bar.progress((idx + 1) / len(content['sections'])) | |
# Save | |
output = io.BytesIO() | |
prs.save(output) | |
output.seek(0) | |
return output | |
except Exception as e: | |
logger.error(f"Error creating presentation: {e}") | |
st.error(f"Error creating presentation: {str(e)}") | |
return None | |
def main(): | |
st.set_page_config(page_title="DietitianSlide AI", layout="wide", page_icon="π₯") | |
generator = PresentationGenerator() | |
st.title("π₯ DietitianSlide AI - Professional Nutrition Presentations") | |
# Sidebar settings | |
with st.sidebar: | |
category = st.selectbox("Nutrition Field", list(NUTRITION_FIELDS.keys())) | |
topic = st.selectbox("Specific Topic", NUTRITION_FIELDS[category]) | |
template = st.selectbox("Template Style", | |
["Professional", "Academic", "Modern", "Clinical"]) | |
max_points = st.slider("Points per Slide", 3, 8, 5) | |
# Main content area | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
st.subheader("Content Input") | |
content_method = st.radio("Input Method", | |
["Write Content", "Upload Files", "Both"]) | |
user_content = "" | |
if content_method in ["Write Content", "Both"]: | |
user_content = st.text_area("Enter your content", height=150) | |
if content_method in ["Upload Files", "Both"]: | |
uploaded_file = st.file_uploader( | |
"Upload File", | |
type=["txt", "docx", "pdf", "pptx"] | |
) | |
if uploaded_file: | |
file_content = "" | |
if uploaded_file.type == "text/plain": | |
file_content = generator.file_processor.process_txt(uploaded_file) | |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
file_content = generator.file_processor.process_docx(uploaded_file) | |
elif uploaded_file.type == "application/pdf": | |
file_content = generator.file_processor.process_pdf(uploaded_file) | |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation": | |
file_content = generator.file_processor.process_pptx(uploaded_file) | |
if file_content: | |
st.text_area("Extracted Content", file_content, height=100) | |
user_content = f"{user_content}\n{file_content}" if user_content else file_content | |
with col2: | |
st.subheader("Preview & Generate") | |
if st.button("Generate Presentation", type="primary"): | |
if not user_content: | |
st.error("Please provide some content!") | |
return | |
with st.spinner("Generating your presentation..."): | |
content = generator.generate_content(topic, user_content, category) | |
if content: | |
output = generator.create_presentation( | |
f"{category}: {topic}", | |
content, | |
template | |
) | |
if output: | |
st.success("Presentation generated successfully!") | |
st.download_button( | |
"π₯ Download Presentation", | |
output, | |
f"nutrition_{topic.lower().replace(' ', '_')}.pptx", | |
"application/vnd.openxmlformats-officedocument.presentationml.presentation" | |
) | |
# Preview content | |
st.subheader("Content Preview") | |
for section in content['sections']: | |
st.write(f"**{section['title']}**") | |
for point in section['points']: | |
st.write(f"β’ {point}") | |
if __name__ == "__main__": | |
main() | |