matejmicek commited on
Commit
bb971e5
1 Parent(s): b58467b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ import os
5
+ from openai import OpenAI
6
+ import openai
7
+ import base64
8
+
9
+ import time
10
+ import errno
11
+ from elevenlabs import generate, play, voices
12
+
13
+
14
+ from elevenlabs import voices, set_api_key
15
+ set_api_key("8f73d4bd2ab582e4950964e5ecb12aaa")
16
+
17
+
18
+
19
+ voice = voices()[-1]
20
+
21
+ client = OpenAI(api_key='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')
22
+
23
+
24
+ def encode_image(image_path):
25
+ while True:
26
+ try:
27
+ with open(image_path, "rb") as image_file:
28
+ return base64.b64encode(image_file.read()).decode("utf-8")
29
+ except IOError as e:
30
+ if e.errno != errno.EACCES:
31
+ # Not a "file in use" error, re-raise
32
+ raise
33
+ # File is being written to, wait a bit and retry
34
+ time.sleep(0.1)
35
+
36
+
37
+ def generate_new_line(base64_image):
38
+ return [
39
+ {
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "text", "text": "Describe this image"},
43
+ {
44
+ "type": "image_url",
45
+ "image_url": f"data:image/jpeg;base64,{base64_image}",
46
+ },
47
+ ],
48
+ },
49
+ ]
50
+
51
+ def analyze_image(base64_image):
52
+ response = client.chat.completions.create(
53
+ model="gpt-4-vision-preview",
54
+ messages=[
55
+ {
56
+ "role": "system",
57
+ "content": """
58
+ You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
59
+ Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
60
+ """,
61
+ },
62
+ ]
63
+ + generate_new_line(base64_image),
64
+ max_tokens=500,
65
+ )
66
+ response_text = response.choices[0].message.content
67
+ return response_text
68
+
69
+
70
+ from io import BytesIO
71
+ import os
72
+
73
+ def save_uploaded_file(uploaded_file):
74
+ # Create a directory to save the file
75
+ save_path = 'uploaded_images'
76
+ if not os.path.exists(save_path):
77
+ os.makedirs(save_path)
78
+
79
+ # Save the file
80
+ with open(os.path.join(save_path, 'temp'), "wb") as f:
81
+ f.write(uploaded_file.getbuffer())
82
+
83
+ return os.path.join(save_path, 'temp')
84
+
85
+ def process():
86
+ pass # Assuming 'process' is defined in 'your_processing_module'
87
+
88
+ def main():
89
+ st.title("Image to Audio Converter")
90
+
91
+ # Image upload
92
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
93
+ if uploaded_file:
94
+ path = save_uploaded_file(uploaded_file)
95
+ print(f'file saved to {path}')
96
+
97
+ encoded_image = encode_image(path)
98
+ print('image encoded')
99
+ analyzed_image = analyze_image(encoded_image)
100
+ print('image analyzed \n' + analyzed_image)
101
+ audio_file = generate(text=analyzed_image, voice=voice, model="eleven_turbo_v2")
102
+
103
+ if audio_file is not None:
104
+ st.audio(audio_file, format='audio/mp3')
105
+ st.download_button('Download Audio', audio_file, file_name='narrated.mp3')
106
+
107
+ if __name__ == "__main__":
108
+ main()