Spaces:
Sleeping
Sleeping
# ====================================== | |
# Package Import | |
# ====================================== | |
import streamlit as st | |
from PIL import Image | |
import time | |
from transformers import pipeline | |
# ====================================== | |
# Basic Initialization | |
# ====================================== | |
# Initialize image captioning pipeline with pretrained model | |
_image_caption_pipeline = pipeline( | |
task="image-to-text", | |
model="cnmoro/tiny-image-captioning" | |
) | |
# Global model configuration constants | |
_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-0.6B",max_new_tokens=100) | |
# Initialize TTS components once to avoid reloading | |
_SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng") | |
# ====================================== | |
# Function settings | |
# ====================================== | |
def generate_image_caption(input_image): | |
""" | |
Generate a textual description for an input image using a pretrained model. | |
Args: | |
input_image (Union[PIL.Image.Image, str]): Image to process. Can be either: | |
- A PIL Image object | |
- A string containing a filesystem path to an image file | |
Returns: | |
str: Generated caption text in natural language | |
Example: | |
>>> from PIL import Image | |
>>> img = Image.open("photo.jpg") | |
>>> caption = generate_image_caption(img) | |
>>> print(f"Caption: {caption}") | |
""" | |
# Process image through the captioning pipeline | |
inference_results = _image_caption_pipeline(input_image) | |
# Extract text from the first (and only) result dictionary | |
caption_text = inference_results[0]['generated_text'] | |
return caption_text | |
def generate_story_content(system_prompt: str, user_prompt: str) -> str: | |
""" | |
Generates a children's story based on provided system and user prompts. | |
Args: | |
system_prompt: Defines the assistant's role and writing constraints | |
user_prompt: Describes the story scenario and specific elements to include | |
Returns: | |
Generated story text without any thinking process metadata | |
Raises: | |
RuntimeError: If text generation fails at any stage | |
Example: | |
>>> story = generate_story_content( | |
... "You are a helpful children's author...", | |
... "Kids playing with dogs in a sunny meadow..." | |
... ) | |
""" | |
try: | |
# Prepare chat message structure | |
conversation_history = [ | |
{"role": "user", "content": system_prompt+user_prompt+"/no_think"}, | |
] | |
# Generate the story | |
story=_text_generation_pipeline(conversation_history) | |
# Extract the stroy result | |
stroy_result=story[0]["generated_text"][1]["content"][19:] | |
# Process and clean output | |
return stroy_result | |
except Exception as error: | |
raise RuntimeError(f"Story generation failed: {str(error)}") from error | |
def generate_audio_from_story(story_text: str) -> str: | |
""" | |
Convert text story to speech audio file using text-to-speech synthesis. | |
Args: | |
story_text: Input story text to synthesize | |
Returns: | |
Path to generated audio file | |
Raises: | |
ValueError: For empty/invalid input text | |
RuntimeError: If audio generation fails | |
Example: | |
>>> generate_audio_from_story("Children playing in the park", "story_audio.wav") | |
'story_audio.wav' | |
""" | |
# Validate input text | |
if not isinstance(story_text, str) or not story_text.strip(): | |
raise ValueError("Input story text must be a non-empty string") | |
try: | |
# Generate speech | |
speech_output = _SPEECH_PIPELINE( story_text ) | |
return speech_output | |
except Exception as error: | |
raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error | |
# ====================================== | |
# Main Application Interface | |
# ====================================== | |
def main(): | |
"""Main application interface for Streamlit""" | |
# Page configuration | |
st.set_page_config( | |
page_title="Fantasy Adventure Generator", | |
layout="wide", | |
initial_sidebar_state="collapsed" | |
) | |
# Title and description | |
st.title("π§ββοΈ Fantasy Adventure Story Generator") | |
st.markdown(""" | |
Upload an image and get: | |
- Automatic scene description | |
- AI-generated adventure story | |
- Audio version of the story | |
""") | |
# Help section | |
st.markdown("---") | |
st.subheader("π How to Use:") | |
st.info(""" | |
1. Upload any picture (animals, nature, or people work best!) | |
2. Click the generating button | |
3. Wait for image analysis to complete | |
4. Enjoy your story and audio! | |
""") | |
# File uploader | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Process image | |
image = Image.open(uploaded_file).convert("RGB") | |
# Layout columns | |
col1, col2 = st.columns(2) | |
with col1: | |
st.image(image, caption="Uploaded Image", use_container_width=True) | |
# Generation button | |
if st.button("β¨ Generate Story & Audio"): | |
with st.spinner("Processing your request..."): | |
# Generate outputs and Display results | |
with col2: | |
st.subheader("π Scene Description") | |
with st.spinner("Preparing story caption..."): | |
caption = generate_image_caption(image) | |
st.write(caption) | |
st.subheader("π Generated Story") | |
with st.spinner("Preparing story..."): | |
sys_prompt = "You are a fantasy writer. Create a 100-word adventure story about " | |
story = generate_story_content(sys_prompt, caption) | |
st.write(story) | |
st.subheader("π Audio Playback") | |
with st.spinner("Preparing speech..."): | |
speech = generate_audio_from_story(story) | |
st.audio(speech["audio"], sample_rate=speech["sampling_rate"], format='audio/wav') | |
if __name__ == "__main__": | |
main() |