Spaces:

hskwon7
/

classification_test

Sleeping

App Files Files Community

classification_test / app.py

hskwon7

Update app.py

3c49d37 verified 3 months ago

raw

history blame contribute delete

2.12 kB

	import streamlit as st
	from transformers import pipeline
	from PIL import Image
	import io
	from gtts import gTTS
	import tempfile

	st.title("🖼️ → 📖 Image-to-Story Demo")
	st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")

	@st.cache_resource
	def load_captioner():
	return pipeline("image-to-text", model="unography/blip-large-long-cap")

	@st.cache_resource
	def load_story_gen():
	return pipeline("text-generation", model="gpt2", tokenizer="gpt2")

	captioner = load_captioner()
	story_gen = load_story_gen()

	uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
	if uploaded:
	img = Image.open(uploaded)
	st.image(img, use_column_width=True)

	# Caption
	if "caption" not in st.session_state:
	with st.spinner("Generating caption…"):
	caps = captioner(img)
	st.session_state.caption = caps[0] if isinstance(caps, list) else caps
	st.write("Caption:", st.session_state.caption)

	# Story
	if "story" not in st.session_state:
	with st.spinner("Spinning up a story…"):
	out = story_gen(
	st.session_state.caption,
	max_length=200,
	num_return_sequences=1,
	do_sample=True,
	top_p=0.9
	)
	st.session_state.story = out[0]["generated_text"]
	st.write("Story:", st.session_state.story)

	# Prepare audio bytes once
	if "audio_bytes" not in st.session_state:
	with st.spinner("Generating audio…"):
	tts = gTTS(text=st.session_state.story, lang="en")
	buf = io.BytesIO()
	tts.write_to_fp(buf)
	st.session_state.audio_bytes = buf.getvalue()

	# Play button
	if st.button("🔊 Play Story Audio"):
	# Write to a temp file
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tmp.write(st.session_state.audio_bytes)
	tmp.flush()
	tmp_path = tmp.name
	tmp.close()
	# Stream it
	st.audio(tmp_path, format="audio/mp3")