import streamlit as st from transformers import pipeline import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import AutoProcessor, AutoModel from gtts import gTTS import os import io # Initialize session state for storing data if 'scenario' not in st.session_state: st.session_state.scenario = None if 'scenario_zh' not in st.session_state: st.session_state.scenario_zh = None if 'story' not in st.session_state: st.session_state.story = None if 'story_zh' not in st.session_state: st.session_state.story_zh = None if 'audio_generated' not in st.session_state: st.session_state.audio_generated = False if 'audio_data' not in st.session_state: st.session_state.audio_data = None # function part # img2text def img2text(url): image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") text = image_to_text_model(url)[0]["generated_text"] return text # Translation function EN to ZH def translate_to_chinese(text): translator = pipeline("translation", model="steve-tong/opus-mt-en-zh-hk") translation = translator(text)[0]["translation_text"] return translation # text2story def text2story(text): # Initialize the text generation pipeline generator = pipeline('text-generation', model='gpt2') # Create a prompt for the story prompt = f"Create a short story about this scene: {text}\n\nStory:" # Generate the story story = generator(prompt, max_length=100, num_return_sequences=1, temperature=0.7)[0]['generated_text'] # Clean up the story by removing the prompt story = story.replace(prompt, "").strip() return story def text2audio(text, lang='zh'): try: # Create a gTTS object with Chinese language tts = gTTS(text=text, lang=lang) # Save to BytesIO object audio_bytes = io.BytesIO() tts.write_to_fp(audio_bytes) audio_bytes.seek(0) # Reset the pointer to the start return { 'audio': audio_bytes, 'sampling_rate': 24000 # gTTS default sampling rate } except Exception as e: st.error(f"音頻製作出左問題: {str(e)}") return None # Apply custom CSS for modern, stylish kid-friendly UI st.set_page_config(page_title="故事魔法", page_icon="✨", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # App header with Cantonese st.title("✨ 故事魔法") st.markdown("

上載一張圖片，睇下佢點變成一個神奇嘅故事！

", unsafe_allow_html=True) # File uploader with Cantonese with st.container(): st.subheader("揀一張靚相啦！") uploaded_file = st.file_uploader("", key="upload") if uploaded_file is not None: # Save uploaded file bytes_data = uploaded_file.getvalue() temp_file_path = uploaded_file.name with open(temp_file_path, "wb") as file: file.write(bytes_data) # Display image st.image(uploaded_file, use_column_width=True) # Reset session state if a new file is uploaded (detect by checking if there's no scenario yet) if st.session_state.scenario is None: # Stage 1: Image to Text with st.container(): st.markdown("

🔍 圖片解讀中

", unsafe_allow_html=True) # Generate caption if not already done st.session_state.scenario = img2text(temp_file_path) # Display English caption st.text("英文描述: " + st.session_state.scenario) # Translate the caption to Chinese st.session_state.scenario_zh = translate_to_chinese(st.session_state.scenario) # Display Chinese caption st.text("中文描述: " + st.session_state.scenario_zh) # Stage 2: Text to Story with st.container(): st.markdown("

📝 故事創作中

", unsafe_allow_html=True) # Generate story if not already done st.session_state.story = text2story(st.session_state.scenario) # Display English story st.text("英文故事: " + st.session_state.story) # Translate the story to Chinese st.session_state.story_zh = translate_to_chinese(st.session_state.story) # Display Chinese story st.text("中文故事: " + st.session_state.story_zh) else: # Display saved results from session state with st.container(): st.markdown("

🔍 圖片解讀中

", unsafe_allow_html=True) st.text("英文描述: " + st.session_state.scenario) st.text("中文描述: " + st.session_state.scenario_zh) with st.container(): st.markdown("

📝 故事創作中

", unsafe_allow_html=True) st.text("英文故事: " + st.session_state.story) st.text("中文故事: " + st.session_state.story_zh) # Stage 3: Story to Audio data with st.container(): st.markdown("

🔊 故事準備朗讀中

", unsafe_allow_html=True) # Play button with Cantonese text if st.button("🔊 播放故事"): # Only generate audio if not already done if not st.session_state.audio_generated: st.session_state.audio_data = text2audio(st.session_state.story_zh, lang='zh') st.session_state.audio_generated = True # Play the audio if st.session_state.audio_data: st.audio(st.session_state.audio_data['audio'], format="audio/wav", start_time=0, sample_rate=st.session_state.audio_data['sampling_rate']) else: st.error("哎呀！再試多次啦！") # Cleanup: Remove the temporary file when the user is done if os.path.exists(temp_file_path): os.remove(temp_file_path) else: # Clear session state when no file is uploaded st.session_state.scenario = None st.session_state.scenario_zh = None st.session_state.story = None st.session_state.story_zh = None st.session_state.audio_generated = False st.session_state.audio_data = None # Welcome message in Cantonese st.markdown("""

✨

歡迎嚟到故事魔法！

上載一張你鍾意嘅相片，我哋嘅魔法師會幫你變出一個好好玩嘅故事！

🚀 🦄 🔮 🌈

""", unsafe_allow_html=True)