Image_Narration / app.py
sungyi654's picture
Update app.py
3ca247c verified
raw
history blame contribute delete
No virus
2.43 kB
import os
from PIL import Image
from gtts import gTTS
from io import BytesIO
import io
from openai import OpenAI
#from dotenv import load_dotenv
import streamlit as st
from transformers import pipeline
# For explaining what is going on in the image
img_nar = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
#load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
st.header("Image Narrator")
# Temporary
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if 'history' not in st.session_state:
st.session_state['history'] = []
personality = st.text_input("Enter a personality")
image_narration = "No narration given"
# Check if an image has been uploaded
if uploaded_image is not None:
# Convert the uploaded file to a PIL image
bytes_data = uploaded_image.getvalue()
pil_image = Image.open(io.BytesIO(bytes_data))
# Now, use the PIL image with the pipeline
image_narration = img_nar(pil_image)
# Display the uploaded image using the original bytes data
st.image(pil_image, caption='Uploaded Image.', use_column_width=True)
image_narration = image_narration[0]["generated_text"]
#st.write(image_narration)
def update_and_get_narration(personality, user_input):
if personality and user_input:
st.session_state['history'].append({"role": "user", "content": user_input})
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You reiterate what is said to you but narrate it like a {personality}."}
] + st.session_state['history']
)
gpt_response = response.choices[0].message.content
st.session_state['history'].append({"role": "assistant", "content": gpt_response})
return gpt_response
else:
return "Please enter both a personality and some image classification text."
if st.button('Narrate'):
narration = update_and_get_narration(personality, image_narration)
st.write(narration)
tts = gTTS(text=narration, lang='en')
audio_buffer = BytesIO()
tts.write_to_fp(audio_buffer)
audio_buffer.seek(0)
st.audio(audio_buffer, format='audio/mp3', start_time=0)
else:
st.write(st.session_state['history'][-1]['content'] if st.session_state['history'] else "Narration will appear here.")