Spaces:

sungyi654
/

Image_Narration

Runtime error

App Files Files Community

Image_Narration / app.py

sungyi654

Update app.py

3ca247c verified almost 2 years ago

raw

history blame contribute delete

2.43 kB

	import os
	from PIL import Image
	from gtts import gTTS
	from io import BytesIO
	import io
	from openai import OpenAI
	#from dotenv import load_dotenv
	import streamlit as st
	from transformers import pipeline

	# For explaining what is going on in the image
	img_nar = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

	#load_dotenv()

	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	st.header("Image Narrator")

	# Temporary
	uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

	if 'history' not in st.session_state:
	st.session_state['history'] = []

	personality = st.text_input("Enter a personality")
	image_narration = "No narration given"

	# Check if an image has been uploaded
	if uploaded_image is not None:
	# Convert the uploaded file to a PIL image
	bytes_data = uploaded_image.getvalue()
	pil_image = Image.open(io.BytesIO(bytes_data))

	# Now, use the PIL image with the pipeline
	image_narration = img_nar(pil_image)

	# Display the uploaded image using the original bytes data
	st.image(pil_image, caption='Uploaded Image.', use_column_width=True)

	image_narration = image_narration[0]["generated_text"]

	#st.write(image_narration)

	def update_and_get_narration(personality, user_input):
	if personality and user_input:
	st.session_state['history'].append({"role": "user", "content": user_input})

	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": f"You reiterate what is said to you but narrate it like a {personality}."}
	] + st.session_state['history']
	)

	gpt_response = response.choices[0].message.content
	st.session_state['history'].append({"role": "assistant", "content": gpt_response})

	return gpt_response
	else:
	return "Please enter both a personality and some image classification text."

	if st.button('Narrate'):
	narration = update_and_get_narration(personality, image_narration)
	st.write(narration)
	tts = gTTS(text=narration, lang='en')
	audio_buffer = BytesIO()
	tts.write_to_fp(audio_buffer)
	audio_buffer.seek(0)

	st.audio(audio_buffer, format='audio/mp3', start_time=0)
	else:
	st.write(st.session_state['history'][-1]['content'] if st.session_state['history'] else "Narration will appear here.")