Spaces:

ccclllwww
/

Assignment_V1

Build error

App Files Files Community

Assignment_V1 / app.py

ccclllwww

Update app.py

cf27fca verified 7 months ago

raw

history blame contribute delete

6.41 kB

	# ======================================
	# Package Import
	# ======================================

	import streamlit as st
	from PIL import Image
	import time
	from transformers import pipeline

	# ======================================
	# Basic Initialization
	# ======================================

	# Initialize image captioning pipeline with pretrained model
	_image_caption_pipeline = pipeline(
	task="image-to-text",
	model="cnmoro/tiny-image-captioning"
	)

	# Global model configuration constants
	_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-0.6B",max_new_tokens=100)

	# Initialize TTS components once to avoid reloading
	_SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng")

	# ======================================
	# Function settings
	# ======================================

	def generate_image_caption(input_image):
	"""
	Generate a textual description for an input image using a pretrained model.

	Args:
	input_image (Union[PIL.Image.Image, str]): Image to process. Can be either:
	- A PIL Image object
	- A string containing a filesystem path to an image file

	Returns:
	str: Generated caption text in natural language

	Example:
	>>> from PIL import Image
	>>> img = Image.open("photo.jpg")
	>>> caption = generate_image_caption(img)
	>>> print(f"Caption: {caption}")
	"""
	# Process image through the captioning pipeline
	inference_results = _image_caption_pipeline(input_image)

	# Extract text from the first (and only) result dictionary
	caption_text = inference_results[0]['generated_text']

	return caption_text

	def generate_story_content(system_prompt: str, user_prompt: str) -> str:
	"""
	Generates a children's story based on provided system and user prompts.

	Args:
	system_prompt: Defines the assistant's role and writing constraints
	user_prompt: Describes the story scenario and specific elements to include

	Returns:
	Generated story text without any thinking process metadata

	Raises:
	RuntimeError: If text generation fails at any stage

	Example:
	>>> story = generate_story_content(
	... "You are a helpful children's author...",
	... "Kids playing with dogs in a sunny meadow..."
	... )
	"""
	try:
	# Prepare chat message structure
	conversation_history = [
	{"role": "user", "content": system_prompt+user_prompt+"/no_think"},
	]

	# Generate the story
	story=_text_generation_pipeline(conversation_history)

	# Extract the stroy result
	stroy_result=story[0]["generated_text"][1]["content"][19:]

	# Process and clean output
	return stroy_result

	except Exception as error:
	raise RuntimeError(f"Story generation failed: {str(error)}") from error

	def generate_audio_from_story(story_text: str) -> str:
	"""
	Convert text story to speech audio file using text-to-speech synthesis.

	Args:
	story_text: Input story text to synthesize

	Returns:
	Path to generated audio file

	Raises:
	ValueError: For empty/invalid input text
	RuntimeError: If audio generation fails

	Example:
	>>> generate_audio_from_story("Children playing in the park", "story_audio.wav")
	'story_audio.wav'
	"""
	# Validate input text
	if not isinstance(story_text, str) or not story_text.strip():
	raise ValueError("Input story text must be a non-empty string")

	try:
	# Generate speech
	speech_output = _SPEECH_PIPELINE( story_text )

	return speech_output

	except Exception as error:
	raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error

	# ======================================
	# Main Application Interface
	# ======================================
	def main():
	"""Main application interface for Streamlit"""
	# Page configuration
	st.set_page_config(
	page_title="Fantasy Adventure Generator",
	layout="wide",
	initial_sidebar_state="collapsed"
	)


	# Title and description
	st.title("🧙‍♂️ Fantasy Adventure Story Generator")
	st.markdown("""
	Upload an image and get:
	- Automatic scene description
	- AI-generated adventure story
	- Audio version of the story
	""")

	# Help section
	st.markdown("---")
	st.subheader("🌟 How to Use:")
	st.info("""
	1. Upload any picture (animals, nature, or people work best!)
	2. Click the generating button
	3. Wait for image analysis to complete
	4. Enjoy your story and audio!
	""")

	# File uploader
	uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	# Process image
	image = Image.open(uploaded_file).convert("RGB")

	# Layout columns
	col1, col2 = st.columns(2)

	with col1:
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# Generation button
	if st.button("✨ Generate Story & Audio"):
	with st.spinner("Processing your request..."):
	# Generate outputs and Display results
	with col2:
	st.subheader("🔍 Scene Description")
	with st.spinner("Preparing story caption..."):
	caption = generate_image_caption(image)
	st.write(caption)

	st.subheader("📖 Generated Story")
	with st.spinner("Preparing story..."):
	sys_prompt = "You are a fantasy writer. Create a 100-word adventure story about "
	story = generate_story_content(sys_prompt, caption)
	st.write(story)

	st.subheader("🔊 Audio Playback")
	with st.spinner("Preparing speech..."):
	speech = generate_audio_from_story(story)
	st.audio(speech["audio"], sample_rate=speech["sampling_rate"], format='audio/wav')

	if __name__ == "__main__":
	main()