import streamlit as st from openai import OpenAI from dotenv import load_dotenv import os import tempfile load_dotenv() st.title("Image Description and Audio Generation") # Initialize OpenAI client client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) # Define function to process image description and generate audio def process_image_and_generate_audio(image_url): try: response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Explain every single thing about this image"}, { "type": "image_url", "image_url": {"url": image_url}, }, ], } ], max_tokens=300, ) # Get content from response content = response.choices[0].message.content # Generate audio from content audio_response = client.audio.speech.create( model="tts-1", voice="alloy", input=content, ) return content, audio_response except Exception as e: st.error(f"An error occurred: {str(e)}") return None, None # Streamlit UI def main(): # Image URL input image_url = st.text_input("Enter Image URL") if st.button("Generate Description and Audio"): if not image_url: st.warning("Please enter an image URL.") else: st.info("Processing image and generating audio...") # Generate content and audio content, audio_response = process_image_and_generate_audio(image_url) if content is not None and audio_response is not None: # Write audio to a temporary file with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: audio_response.stream_to_file(f.name) # Display content st.markdown("**Description:**") st.write(content) # Display the audio st.audio(open(f.name, "rb").read(), format="audio/mp3") if __name__ == "__main__": main()