Assignment_V1 / app.py
ccclllwww's picture
Update app.py
cf27fca verified
# ======================================
# Package Import
# ======================================
import streamlit as st
from PIL import Image
import time
from transformers import pipeline
# ======================================
# Basic Initialization
# ======================================
# Initialize image captioning pipeline with pretrained model
_image_caption_pipeline = pipeline(
task="image-to-text",
model="cnmoro/tiny-image-captioning"
)
# Global model configuration constants
_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-0.6B",max_new_tokens=100)
# Initialize TTS components once to avoid reloading
_SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng")
# ======================================
# Function settings
# ======================================
def generate_image_caption(input_image):
"""
Generate a textual description for an input image using a pretrained model.
Args:
input_image (Union[PIL.Image.Image, str]): Image to process. Can be either:
- A PIL Image object
- A string containing a filesystem path to an image file
Returns:
str: Generated caption text in natural language
Example:
>>> from PIL import Image
>>> img = Image.open("photo.jpg")
>>> caption = generate_image_caption(img)
>>> print(f"Caption: {caption}")
"""
# Process image through the captioning pipeline
inference_results = _image_caption_pipeline(input_image)
# Extract text from the first (and only) result dictionary
caption_text = inference_results[0]['generated_text']
return caption_text
def generate_story_content(system_prompt: str, user_prompt: str) -> str:
"""
Generates a children's story based on provided system and user prompts.
Args:
system_prompt: Defines the assistant's role and writing constraints
user_prompt: Describes the story scenario and specific elements to include
Returns:
Generated story text without any thinking process metadata
Raises:
RuntimeError: If text generation fails at any stage
Example:
>>> story = generate_story_content(
... "You are a helpful children's author...",
... "Kids playing with dogs in a sunny meadow..."
... )
"""
try:
# Prepare chat message structure
conversation_history = [
{"role": "user", "content": system_prompt+user_prompt+"/no_think"},
]
# Generate the story
story=_text_generation_pipeline(conversation_history)
# Extract the stroy result
stroy_result=story[0]["generated_text"][1]["content"][19:]
# Process and clean output
return stroy_result
except Exception as error:
raise RuntimeError(f"Story generation failed: {str(error)}") from error
def generate_audio_from_story(story_text: str) -> str:
"""
Convert text story to speech audio file using text-to-speech synthesis.
Args:
story_text: Input story text to synthesize
Returns:
Path to generated audio file
Raises:
ValueError: For empty/invalid input text
RuntimeError: If audio generation fails
Example:
>>> generate_audio_from_story("Children playing in the park", "story_audio.wav")
'story_audio.wav'
"""
# Validate input text
if not isinstance(story_text, str) or not story_text.strip():
raise ValueError("Input story text must be a non-empty string")
try:
# Generate speech
speech_output = _SPEECH_PIPELINE( story_text )
return speech_output
except Exception as error:
raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
# ======================================
# Main Application Interface
# ======================================
def main():
"""Main application interface for Streamlit"""
# Page configuration
st.set_page_config(
page_title="Fantasy Adventure Generator",
layout="wide",
initial_sidebar_state="collapsed"
)
# Title and description
st.title("πŸ§™β€β™‚οΈ Fantasy Adventure Story Generator")
st.markdown("""
Upload an image and get:
- Automatic scene description
- AI-generated adventure story
- Audio version of the story
""")
# Help section
st.markdown("---")
st.subheader("🌟 How to Use:")
st.info("""
1. Upload any picture (animals, nature, or people work best!)
2. Click the generating button
3. Wait for image analysis to complete
4. Enjoy your story and audio!
""")
# File uploader
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Process image
image = Image.open(uploaded_file).convert("RGB")
# Layout columns
col1, col2 = st.columns(2)
with col1:
st.image(image, caption="Uploaded Image", use_container_width=True)
# Generation button
if st.button("✨ Generate Story & Audio"):
with st.spinner("Processing your request..."):
# Generate outputs and Display results
with col2:
st.subheader("πŸ” Scene Description")
with st.spinner("Preparing story caption..."):
caption = generate_image_caption(image)
st.write(caption)
st.subheader("πŸ“– Generated Story")
with st.spinner("Preparing story..."):
sys_prompt = "You are a fantasy writer. Create a 100-word adventure story about "
story = generate_story_content(sys_prompt, caption)
st.write(story)
st.subheader("πŸ”Š Audio Playback")
with st.spinner("Preparing speech..."):
speech = generate_audio_from_story(story)
st.audio(speech["audio"], sample_rate=speech["sampling_rate"], format='audio/wav')
if __name__ == "__main__":
main()