# ====================================== # Package Import # ====================================== import streamlit as st from PIL import Image import time from transformers import pipeline # ====================================== # Basic Initialization # ====================================== # Initialize image captioning pipeline with pretrained model _image_caption_pipeline = pipeline( task="image-to-text", model="cnmoro/tiny-image-captioning" ) # Global model configuration constants _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-0.6B",max_new_tokens=100) # Initialize TTS components once to avoid reloading _SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng") # ====================================== # Function settings # ====================================== def generate_image_caption(input_image): """ Generate a textual description for an input image using a pretrained model. Args: input_image (Union[PIL.Image.Image, str]): Image to process. Can be either: - A PIL Image object - A string containing a filesystem path to an image file Returns: str: Generated caption text in natural language Example: >>> from PIL import Image >>> img = Image.open("photo.jpg") >>> caption = generate_image_caption(img) >>> print(f"Caption: {caption}") """ # Process image through the captioning pipeline inference_results = _image_caption_pipeline(input_image) # Extract text from the first (and only) result dictionary caption_text = inference_results[0]['generated_text'] return caption_text def generate_story_content(system_prompt: str, user_prompt: str) -> str: """ Generates a children's story based on provided system and user prompts. Args: system_prompt: Defines the assistant's role and writing constraints user_prompt: Describes the story scenario and specific elements to include Returns: Generated story text without any thinking process metadata Raises: RuntimeError: If text generation fails at any stage Example: >>> story = generate_story_content( ... "You are a helpful children's author...", ... "Kids playing with dogs in a sunny meadow..." ... ) """ try: # Prepare chat message structure conversation_history = [ {"role": "user", "content": system_prompt+user_prompt+"/no_think"}, ] # Generate the story story=_text_generation_pipeline(conversation_history) # Extract the stroy result stroy_result=story[0]["generated_text"][1]["content"][19:] # Process and clean output return stroy_result except Exception as error: raise RuntimeError(f"Story generation failed: {str(error)}") from error def generate_audio_from_story(story_text: str) -> str: """ Convert text story to speech audio file using text-to-speech synthesis. Args: story_text: Input story text to synthesize Returns: Path to generated audio file Raises: ValueError: For empty/invalid input text RuntimeError: If audio generation fails Example: >>> generate_audio_from_story("Children playing in the park", "story_audio.wav") 'story_audio.wav' """ # Validate input text if not isinstance(story_text, str) or not story_text.strip(): raise ValueError("Input story text must be a non-empty string") try: # Generate speech speech_output = _SPEECH_PIPELINE( story_text ) return speech_output except Exception as error: raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error # ====================================== # Main Application Interface # ====================================== def main(): """Main application interface for Streamlit""" # Page configuration st.set_page_config( page_title="Fantasy Adventure Generator", layout="wide", initial_sidebar_state="collapsed" ) # Title and description st.title("🧙‍♂️ Fantasy Adventure Story Generator") st.markdown(""" Upload an image and get: - Automatic scene description - AI-generated adventure story - Audio version of the story """) # Help section st.markdown("---") st.subheader("🌟 How to Use:") st.info(""" 1. Upload any picture (animals, nature, or people work best!) 2. Click the generating button 3. Wait for image analysis to complete 4. Enjoy your story and audio! """) # File uploader uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Process image image = Image.open(uploaded_file).convert("RGB") # Layout columns col1, col2 = st.columns(2) with col1: st.image(image, caption="Uploaded Image", use_container_width=True) # Generation button if st.button("✨ Generate Story & Audio"): with st.spinner("Processing your request..."): # Generate outputs and Display results with col2: st.subheader("🔍 Scene Description") with st.spinner("Preparing story caption..."): caption = generate_image_caption(image) st.write(caption) st.subheader("📖 Generated Story") with st.spinner("Preparing story..."): sys_prompt = "You are a fantasy writer. Create a 100-word adventure story about " story = generate_story_content(sys_prompt, caption) st.write(story) st.subheader("🔊 Audio Playback") with st.spinner("Preparing speech..."): speech = generate_audio_from_story(story) st.audio(speech["audio"], sample_rate=speech["sampling_rate"], format='audio/wav') if __name__ == "__main__": main()