File size: 4,281 Bytes
bb971e5
 
 
 
 
 
 
 
 
 
 
 
 
 
6d1765e
bb971e5
 
 
 
 
6d1765e
bb971e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a629947
bb971e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a629947
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb971e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a629947
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb971e5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st


import os
from openai import OpenAI
import openai
import base64

import time
import errno
from elevenlabs import generate, play, voices


from elevenlabs import voices, set_api_key
set_api_key(os.environ['eleven'])



voice = voices()[-1]

client = OpenAI(api_key=os.environ['open'])


def encode_image(image_path):
    while True:
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except IOError as e:
            if e.errno != errno.EACCES:
                # Not a "file in use" error, re-raise
                raise
            # File is being written to, wait a bit and retry
            time.sleep(0.1)


def generate_new_line(base64_image):
    return [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image"},
                {
                    "type": "image_url",
                    "image_url": f"data:image/jpeg;base64,{base64_image}",
                },
            ],
        },
    ]

def analyze_image(base64_image):
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[
            {
                "role": "system",
                "content": """
                You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
                Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
                """,
            },
        ]
        + generate_new_line(base64_image),
        max_tokens=100,
    )
    response_text = response.choices[0].message.content
    return response_text


from io import BytesIO
import os

def save_uploaded_file(uploaded_file):
    # Create a directory to save the file
    save_path = 'uploaded_images'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Save the file
    with open(os.path.join(save_path, 'temp'), "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    return os.path.join(save_path, 'temp')


def save_audio_file(audio):
    # Create a directory to save the file
    save_path = 'audio'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Save the file
    with open(os.path.join(save_path, 'temp.mp3'), "wb") as f:
        f.write(audio)
    
    return os.path.join(save_path, 'temp')


from moviepy.editor import ImageClip, AudioFileClip


def main():
    st.title("Image to Audio Converter")

    # Image upload
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
    if uploaded_file:
        path = save_uploaded_file(uploaded_file)
        print(f'file saved to {path}')

        encoded_image = encode_image(path)
        print('image encoded')
        analyzed_image = analyze_image(encoded_image)
        print('image analyzed \n' + analyzed_image)
        audio_file = generate(text=analyzed_image, voice=voice, model="eleven_turbo_v2")

        if audio_file is not None:
            st.audio(audio_file, format='audio/mp3')
            st.download_button('Download Audio', audio_file, file_name='narrated.mp3')
            audio_filename = 'narrated.mp3'
            with open(audio_filename, 'wb') as f:
                f.write(audio_file)

            print('creating video')
            # Create a video clip from the static image
            video_clip = ImageClip(path).set_duration(AudioFileClip(audio_filename).duration)


            # Set the audio of the video clip as the generated audio file
            video_clip = video_clip.set_audio(AudioFileClip(audio_filename))

            print('video created')

            # Specify the filename for the video
            video_filename = "narrated_video.mp4"

            # Write the video file to disk
            video_clip.write_videofile(video_filename, codec="libx264", audio_codec="aac", fps=24)

            # Provide a download button for the video
            with open(video_filename, "rb") as file:
                st.video(video_filename)
                st.download_button('Download Video', file, file_name=video_filename)

if __name__ == "__main__":
    main()