import streamlit as st from transformers import pipeline import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import AutoProcessor, AutoModel import edge_tts import asyncio import os import io import tempfile # Initialize session state for storing data if 'scenario' not in st.session_state: st.session_state.scenario = None if 'scenario_zh' not in st.session_state: st.session_state.scenario_zh = None if 'story' not in st.session_state: st.session_state.story = None if 'story_zh' not in st.session_state: st.session_state.story_zh = None if 'audio_generated_zh' not in st.session_state: st.session_state.audio_generated_zh = False if 'audio_path_zh' not in st.session_state: st.session_state.audio_path_zh = None if 'audio_generated_en' not in st.session_state: st.session_state.audio_generated_en = False if 'audio_path_en' not in st.session_state: st.session_state.audio_path_en = None # function part # img2text def img2text(url): image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") text = image_to_text_model(url)[0]["generated_text"] return text # Translation function EN to ZH def translate_to_chinese(text): translator = pipeline("translation", model="steve-tong/opus-mt-en-zh-hk") translation = translator(text)[0]["translation_text"] return translation # text2story - using mosaicml/mpt-7b-storywriter model for better stories def text2story(text): try: # Initialize the improved story generation pipeline generator = pipeline("text-generation", model="mosaicml/mpt-7b-storywriter", trust_remote_code=True) # Create a prompt for the story prompt = f"Write a short children's story about this scene: {text}\n\nStory: " # Generate the story - limit to a smaller max_length due to model size story = generator(prompt, max_length=150, num_return_sequences=1, temperature=0.7, repetition_penalty=1.2)[0]['generated_text'] # Clean up the story by removing the prompt story = story.replace(prompt, "").strip() # Trim to a reasonable length if needed if len(story) > 500: sentences = story.split('.') trimmed_story = '.'.join(sentences[:5]) + '.' return trimmed_story return story except Exception as e: st.error(f"故事生成出問題: {str(e)}") # Fallback to simpler model if the advanced one fails fallback_generator = pipeline('text-generation', model='gpt2') fallback_prompt = f"Create a short story about this scene: {text}\n\nStory:" fallback_story = fallback_generator(fallback_prompt, max_length=100, num_return_sequences=1)[0]['generated_text'] return fallback_story.replace(fallback_prompt, "").strip() # Text to audio using edge_tts for Cantonese audio async def text2audio_cantonese(text): try: # Use Cantonese voice from edge-tts voice = "zh-HK-HiuMaanNeural" # Female Cantonese voice # Alternative: "zh-HK-WanLungNeural" for male voice # Create a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.close() # Configure edge-tts to save to the file path communicate = edge_tts.Communicate(text, voice) await communicate.save(temp_file.name) # Return the path to the audio file return { 'path': temp_file.name, 'success': True } except Exception as e: st.error(f"中文音頻製作出左問題: {str(e)}") return { 'path': None, 'success': False } # Text to audio using edge_tts for English audio async def text2audio_english(text): try: # Use English voice from edge-tts voice = "en-US-AriaNeural" # Female English voice # Alternative: "en-US-GuyNeural" for male voice # Create a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.close() # Configure edge-tts to save to the file path communicate = edge_tts.Communicate(text, voice) await communicate.save(temp_file.name) # Return the path to the audio file return { 'path': temp_file.name, 'success': True } except Exception as e: st.error(f"English audio generation error: {str(e)}") return { 'path': None, 'success': False } # Apply custom CSS for modern, stylish kid-friendly UI st.set_page_config(page_title="故事魔法", page_icon="✨", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # App header with Cantonese st.title("✨ 故事魔法") st.markdown("
上載一張圖片,睇下佢點變成一個神奇嘅故事!
", unsafe_allow_html=True) # Add a progress indicator for model loading progress_placeholder = st.empty() # File uploader with Cantonese with st.container(): st.subheader("揀一張靚相啦!") uploaded_file = st.file_uploader("", key="upload") if uploaded_file is not None: # Save uploaded file bytes_data = uploaded_file.getvalue() temp_file_path = uploaded_file.name with open(temp_file_path, "wb") as file: file.write(bytes_data) # Display image st.image(uploaded_file, use_column_width=True) # Reset session state if a new file is uploaded (detect by checking if there's no scenario yet) if st.session_state.scenario is None: # Stage 1: Image to Text with st.container(): st.markdown("