import streamlit as st import os from openai import OpenAI import openai import base64 import time import errno from elevenlabs import generate, play, voices from elevenlabs import voices, set_api_key set_api_key("8f73d4bd2ab582e4950964e5ecb12aaa") voice = voices()[-1] client = OpenAI(api_key='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') def encode_image(image_path): while True: try: with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") except IOError as e: if e.errno != errno.EACCES: # Not a "file in use" error, re-raise raise # File is being written to, wait a bit and retry time.sleep(0.1) def generate_new_line(base64_image): return [ { "role": "user", "content": [ {"type": "text", "text": "Describe this image"}, { "type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}", }, ], }, ] def analyze_image(base64_image): response = client.chat.completions.create( model="gpt-4-vision-preview", messages=[ { "role": "system", "content": """ You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary. Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it! """, }, ] + generate_new_line(base64_image), max_tokens=500, ) response_text = response.choices[0].message.content return response_text from io import BytesIO import os def save_uploaded_file(uploaded_file): # Create a directory to save the file save_path = 'uploaded_images' if not os.path.exists(save_path): os.makedirs(save_path) # Save the file with open(os.path.join(save_path, 'temp'), "wb") as f: f.write(uploaded_file.getbuffer()) return os.path.join(save_path, 'temp') def process(): pass # Assuming 'process' is defined in 'your_processing_module' def main(): st.title("Image to Audio Converter") # Image upload uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) if uploaded_file: path = save_uploaded_file(uploaded_file) print(f'file saved to {path}') encoded_image = encode_image(path) print('image encoded') analyzed_image = analyze_image(encoded_image) print('image analyzed \n' + analyzed_image) audio_file = generate(text=analyzed_image, voice=voice, model="eleven_turbo_v2") if audio_file is not None: st.audio(audio_file, format='audio/mp3') st.download_button('Download Audio', audio_file, file_name='narrated.mp3') if __name__ == "__main__": main()