import streamlit as st import os from openai import OpenAI import openai import base64 import time import errno from elevenlabs import generate, play, voices from elevenlabs import voices, set_api_key set_api_key(os.environ['eleven']) voice = voices()[-1] client = OpenAI(api_key=os.environ['open']) def encode_image(image_path): while True: try: with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8") except IOError as e: if e.errno != errno.EACCES: # Not a "file in use" error, re-raise raise # File is being written to, wait a bit and retry time.sleep(0.1) def generate_new_line(base64_image): return [ { "role": "user", "content": [ {"type": "text", "text": "Describe this image"}, { "type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}", }, ], }, ] def analyze_image(base64_image): response = client.chat.completions.create( model="gpt-4-vision-preview", messages=[ { "role": "system", "content": """ You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary. Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it! """, }, ] + generate_new_line(base64_image), max_tokens=100, ) response_text = response.choices[0].message.content return response_text from io import BytesIO import os def save_uploaded_file(uploaded_file): # Create a directory to save the file save_path = 'uploaded_images' if not os.path.exists(save_path): os.makedirs(save_path) # Save the file with open(os.path.join(save_path, 'temp'), "wb") as f: f.write(uploaded_file.getbuffer()) return os.path.join(save_path, 'temp') def save_audio_file(audio): # Create a directory to save the file save_path = 'audio' if not os.path.exists(save_path): os.makedirs(save_path) # Save the file with open(os.path.join(save_path, 'temp.mp3'), "wb") as f: f.write(audio) return os.path.join(save_path, 'temp') from moviepy.editor import ImageClip, AudioFileClip def main(): st.title("David Attenborough Narrator") st.text("Upload an image and wait ...") # Image upload uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) if uploaded_file: path = save_uploaded_file(uploaded_file) print(f'file saved to {path}') encoded_image = encode_image(path) print('image encoded') analyzed_image = analyze_image(encoded_image) print('image analyzed \n' + analyzed_image) audio_file = generate(text=analyzed_image, voice=voice, model="eleven_turbo_v2") if audio_file is not None: st.audio(audio_file, format='audio/mp3') st.download_button('Download Audio', audio_file, file_name='narrated.mp3') audio_filename = 'narrated.mp3' with open(audio_filename, 'wb') as f: f.write(audio_file) print('creating video') # Create a video clip from the static image video_clip = ImageClip(path).set_duration(AudioFileClip(audio_filename).duration) # Set the audio of the video clip as the generated audio file video_clip = video_clip.set_audio(AudioFileClip(audio_filename)) print('video created') # Specify the filename for the video video_filename = "narrated_video.mp4" # Write the video file to disk video_clip.write_videofile(video_filename, codec="libx264", audio_codec="aac", fps=1) # Provide a download button for the video with open(video_filename, "rb") as file: st.video(video_filename) st.download_button('Download Video', file, file_name=video_filename) if __name__ == "__main__": main()