Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from openai import OpenAI | |
import openai | |
import base64 | |
import time | |
import errno | |
from elevenlabs import generate, play, voices | |
from elevenlabs import voices, set_api_key | |
set_api_key("8f73d4bd2ab582e4950964e5ecb12aaa") | |
voice = voices()[-1] | |
client = OpenAI(api_key='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') | |
def encode_image(image_path): | |
while True: | |
try: | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode("utf-8") | |
except IOError as e: | |
if e.errno != errno.EACCES: | |
# Not a "file in use" error, re-raise | |
raise | |
# File is being written to, wait a bit and retry | |
time.sleep(0.1) | |
def generate_new_line(base64_image): | |
return [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Describe this image"}, | |
{ | |
"type": "image_url", | |
"image_url": f"data:image/jpeg;base64,{base64_image}", | |
}, | |
], | |
}, | |
] | |
def analyze_image(base64_image): | |
response = client.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=[ | |
{ | |
"role": "system", | |
"content": """ | |
You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary. | |
Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it! | |
""", | |
}, | |
] | |
+ generate_new_line(base64_image), | |
max_tokens=500, | |
) | |
response_text = response.choices[0].message.content | |
return response_text | |
from io import BytesIO | |
import os | |
def save_uploaded_file(uploaded_file): | |
# Create a directory to save the file | |
save_path = 'uploaded_images' | |
if not os.path.exists(save_path): | |
os.makedirs(save_path) | |
# Save the file | |
with open(os.path.join(save_path, 'temp'), "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
return os.path.join(save_path, 'temp') | |
def process(): | |
pass # Assuming 'process' is defined in 'your_processing_module' | |
def main(): | |
st.title("Image to Audio Converter") | |
# Image upload | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) | |
if uploaded_file: | |
path = save_uploaded_file(uploaded_file) | |
print(f'file saved to {path}') | |
encoded_image = encode_image(path) | |
print('image encoded') | |
analyzed_image = analyze_image(encoded_image) | |
print('image analyzed \n' + analyzed_image) | |
audio_file = generate(text=analyzed_image, voice=voice, model="eleven_turbo_v2") | |
if audio_file is not None: | |
st.audio(audio_file, format='audio/mp3') | |
st.download_button('Download Audio', audio_file, file_name='narrated.mp3') | |
if __name__ == "__main__": | |
main() |