muhammadnasar's picture
Update app.py
1edd2f3 verified
raw
history blame contribute delete
No virus
2.3 kB
import streamlit as st
from openai import OpenAI
from dotenv import load_dotenv
import os
import tempfile
load_dotenv()
st.title("Image Description and Audio Generation")
# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Define function to process image description and generate audio
def process_image_and_generate_audio(image_url):
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Explain every single thing about this image"},
{
"type": "image_url",
"image_url": {"url": image_url},
},
],
}
],
max_tokens=300,
)
# Get content from response
content = response.choices[0].message.content
# Generate audio from content
audio_response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=content,
)
return content, audio_response
except Exception as e:
st.error(f"An error occurred: {str(e)}")
return None, None
# Streamlit UI
def main():
# Image URL input
image_url = st.text_input("Enter Image URL")
if st.button("Generate Description and Audio"):
if not image_url:
st.warning("Please enter an image URL.")
else:
st.info("Processing image and generating audio...")
# Generate content and audio
content, audio_response = process_image_and_generate_audio(image_url)
if content is not None and audio_response is not None:
# Write audio to a temporary file
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
audio_response.stream_to_file(f.name)
# Display content
st.markdown("**Description:**")
st.write(content)
# Display the audio
st.audio(open(f.name, "rb").read(), format="audio/mp3")
if __name__ == "__main__":
main()