import streamlit as st from transformers import pipeline import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import AutoProcessor, AutoModel from gtts import gTTS import os import io # Initialize session state for storing data if 'scenario' not in st.session_state: st.session_state.scenario = None if 'scenario_zh' not in st.session_state: st.session_state.scenario_zh = None if 'story' not in st.session_state: st.session_state.story = None if 'story_zh' not in st.session_state: st.session_state.story_zh = None if 'audio_generated' not in st.session_state: st.session_state.audio_generated = False if 'audio_data' not in st.session_state: st.session_state.audio_data = None # function part # img2text def img2text(url): image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") text = image_to_text_model(url)[0]["generated_text"] return text # Translation function EN to ZH def translate_to_chinese(text): translator = pipeline("translation", model="steve-tong/opus-mt-en-zh-hk") translation = translator(text)[0]["translation_text"] return translation # text2story def text2story(text): # Initialize the text generation pipeline generator = pipeline('text-generation', model='gpt2') # Create a prompt for the story prompt = f"Create a short story about this scene: {text}\n\nStory:" # Generate the story story = generator(prompt, max_length=100, num_return_sequences=1, temperature=0.7)[0]['generated_text'] # Clean up the story by removing the prompt story = story.replace(prompt, "").strip() return story def text2audio(text, lang='zh'): try: # Create a gTTS object with Chinese language tts = gTTS(text=text, lang=lang) # Save to BytesIO object audio_bytes = io.BytesIO() tts.write_to_fp(audio_bytes) audio_bytes.seek(0) # Reset the pointer to the start return { 'audio': audio_bytes, 'sampling_rate': 24000 # gTTS default sampling rate } except Exception as e: st.error(f"音頻製作出左問題: {str(e)}") return None # Apply custom CSS for modern, stylish kid-friendly UI st.set_page_config(page_title="故事魔法", page_icon="✨", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # App header with Cantonese st.title("✨ 故事魔法") st.markdown("
上載一張圖片,睇下佢點變成一個神奇嘅故事!
", unsafe_allow_html=True) # File uploader with Cantonese with st.container(): st.subheader("揀一張靚相啦!") uploaded_file = st.file_uploader("", key="upload") if uploaded_file is not None: # Save uploaded file bytes_data = uploaded_file.getvalue() temp_file_path = uploaded_file.name with open(temp_file_path, "wb") as file: file.write(bytes_data) # Display image st.image(uploaded_file, use_column_width=True) # Reset session state if a new file is uploaded (detect by checking if there's no scenario yet) if st.session_state.scenario is None: # Stage 1: Image to Text with st.container(): st.markdown("