cudanexus54 commited on
Commit
b4bce9a
1 Parent(s): 61866ad
Files changed (6) hide show
  1. .gitignore +2 -0
  2. app.py +157 -10
  3. data.json +192 -0
  4. images/2.jpg +0 -0
  5. images/3.jpg +0 -0
  6. images/hero.jpg +0 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ images/2.png
2
+ *.mp4
app.py CHANGED
@@ -1,23 +1,170 @@
1
  import pathlib
2
  import uuid
3
  import os
4
- import gradio as gr
5
  from tqdm import tqdm
 
 
 
 
 
6
 
 
 
7
 
8
- def transcribe_video(d_id_key: str, elv_key: str, full_text: str,img):
9
- print("Test")
10
- segments="Work In Progress"
11
-
12
- return segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- examples = [["abcdefghijkadadasdasdasdasdasddad", "abcdefghijkadadasdasdasdasdasddad","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
15
- "./hero.webp"]]
16
  demo = gr.Interface(fn=transcribe_video, inputs=[
17
  gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
18
  gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
19
  gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
20
- gr.Image(label="Upload a photo of a face here, and generate a talking animation.", type="pil")
21
- ], outputs="text",title="Bring your images to life with the talking animation feature now!",examples=examples)
 
22
 
23
  demo.launch()
 
1
  import pathlib
2
  import uuid
3
  import os
4
+ import alphaui as gr
5
  from tqdm import tqdm
6
+ import requests
7
+ import urllib.request
8
+ import json
9
+ import time
10
+ output_mp3="output.mp3"
11
 
12
+ def upload_image(img: str, d_id_key: str):
13
+ url = "https://api.d-id.com/images"
14
 
15
+ files = {"image": ("hero.jpg", open(img, "rb"), "image/jpg")}
16
+ headers = {
17
+ "accept": "application/json",
18
+ "authorization": "Basic "+d_id_key
19
+ }
20
+
21
+ response = requests.post(url, files=files, headers=headers)
22
+
23
+ response_dict = response.json()
24
+ img_url = response_dict["url"]
25
+ # return audio_url
26
+ print(img_url)
27
+ return img_url
28
+
29
+ def upload_audio(audio: str, d_id_key: str):
30
+ url = "https://api.d-id.com/audios"
31
+
32
+ files = {"audio": (audio, open(audio, "rb"), "audio/mpeg")}
33
+ headers = {
34
+ "accept": "application/json",
35
+ "authorization": "Basic "+d_id_key
36
+ }
37
+ response = requests.post(url, files=files, headers=headers)
38
+ response_dict = response.json()
39
+ audio_url = response_dict["url"]
40
+ # return audio_url
41
+ print(audio_url)
42
+ return audio_url
43
+
44
+ def get_did_video(process_video_url,d_id_key):
45
+ url = "https://api.d-id.com/talks/"+process_video_url
46
+
47
+ headers = {
48
+ "accept": "application/json",
49
+ "authorization": "Basic "+d_id_key
50
+ }
51
+ response_dict = {}
52
+
53
+ while "result_url" not in response_dict:
54
+ # make API call and get response dictionary
55
+ response = requests.get(url, headers=headers)
56
+ response_dict = response.json()
57
+
58
+ print(response.text)
59
+
60
+ # wait for 1 second before checking again
61
+ time.sleep(1)
62
+
63
+ # "result_url" key is now present in the dictionary
64
+ result_url = response_dict["result_url"]
65
+
66
+ print("From did_video \n\n\n")
67
+ print("/n/n/n")
68
+
69
+ # response_dict = response.json()
70
+ result_url = response_dict["result_url"]
71
+
72
+ print(result_url)
73
+ return result_url
74
+
75
+ def text_to_speach_api(text: str, elv_key,voice_id: str):
76
+ url = "https://api.elevenlabs.io/v1/text-to-speech/"+voice_id+"/stream"
77
+ headers = {
78
+ "accept": "*/*",
79
+ "xi-api-key": elv_key,
80
+ "Content-Type": "application/json",
81
+ }
82
+ data = {
83
+ "text": text,
84
+ "voice_settings": {
85
+ "stability": 0,
86
+ "similarity_boost": 0
87
+ }
88
+ }
89
+
90
+ response = requests.post(url, headers=headers, json=data)
91
+ # print(response.text)
92
+
93
+ if response.ok:
94
+ with open("output.mp3", "wb") as f:
95
+ f.write(response.content)
96
+ else:
97
+ print("Error: ", response.text)
98
+
99
+ def get_voice_names():
100
+ with open("data.json") as f:
101
+ data = json.load(f)
102
+ return [voice["name"] for voice in data["voices"]]
103
+
104
+
105
+
106
+ # define a function to get voice id by name
107
+ def get_voice_id(name):
108
+ # load the JSON data
109
+ with open("data.json") as f:
110
+ data = json.load(f)
111
+ for voice in data['voices']:
112
+ if voice['name'] == name:
113
+ return voice['voice_id']
114
+ return None
115
+
116
+ #D-id API
117
+ def d_id_api(image_url, d_id_key,audio_url):
118
+ print("D-id API")
119
+ url = "https://api.d-id.com/talks"
120
+ payload = {
121
+
122
+ "source_url": image_url,
123
+ "script": {
124
+ "type": "audio",
125
+ "audio_url": audio_url,
126
+ }
127
+
128
+ }
129
+ headers = {
130
+ "accept": "application/json",
131
+ "content-type": "application/json",
132
+ "authorization": "Basic "+d_id_key
133
+ }
134
+
135
+ response = requests.post(url, json=payload, headers=headers)
136
+ print("From D-id API \n\n\n")
137
+ print(response.text)
138
+ response_dict = response.json()
139
+ process_video = response_dict["id"]
140
+ # return audio_url
141
+ print(process_video)
142
+ return process_video
143
+
144
+
145
+
146
+ def transcribe_video(d_id_key: str, elv_key: str, full_text: str,voice_name: str,img):
147
+ print(voice_name)
148
+ voice_id=get_voice_id(voice_name)
149
+ text_to_speach_api(full_text, elv_key,voice_id)
150
+ audio_url=upload_audio(output_mp3,d_id_key)
151
+ image_url=upload_image(img,d_id_key)
152
+ process_video_url=d_id_api(image_url, d_id_key,audio_url)
153
+ video_url=get_did_video(process_video_url,d_id_key)
154
+ file_name = 'hero.mp4'
155
+ urllib.request.urlretrieve(video_url, file_name)
156
+ return file_name
157
+
158
+
159
+ examples = [["", "","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
160
+ "Arnold","./images/hero.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/3.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/2.jpg"]]
161
 
 
 
162
  demo = gr.Interface(fn=transcribe_video, inputs=[
163
  gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
164
  gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
165
  gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
166
+ gr.Dropdown(choices=get_voice_names(), label="Select a voice"),
167
+ gr.Image(label="photo of a Person", type="filepath")
168
+ ], outputs="video",title="Bring your images to life with the talking animation feature now!",examples=examples)
169
 
170
  demo.launch()
data.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "voices": [{
3
+ "voice_id": "21m00Tcm4TlvDq8ikWAM",
4
+ "name": "Rachel",
5
+ "samples": null,
6
+ "category": "premade",
7
+ "fine_tuning": {
8
+ "model_id": null,
9
+ "is_allowed_to_fine_tune": false,
10
+ "fine_tuning_requested": false,
11
+ "finetuning_state": "not_started",
12
+ "verification_attempts": null,
13
+ "verification_failures": [],
14
+ "verification_attempts_count": 0,
15
+ "slice_ids": null
16
+ },
17
+ "labels": {},
18
+ "description": null,
19
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/21m00Tcm4TlvDq8ikWAM/6edb9076-c3e4-420c-b6ab-11d43fe341c8.mp3",
20
+ "available_for_tiers": [],
21
+ "settings": null
22
+ },
23
+ {
24
+ "voice_id": "AZnzlk1XvdvUeBnXmlld",
25
+ "name": "Domi",
26
+ "samples": null,
27
+ "category": "premade",
28
+ "fine_tuning": {
29
+ "model_id": null,
30
+ "is_allowed_to_fine_tune": false,
31
+ "fine_tuning_requested": false,
32
+ "finetuning_state": "not_started",
33
+ "verification_attempts": null,
34
+ "verification_failures": [],
35
+ "verification_attempts_count": 0,
36
+ "slice_ids": null
37
+ },
38
+ "labels": {},
39
+ "description": null,
40
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/AZnzlk1XvdvUeBnXmlld/69c5373f-0dc2-4efd-9232-a0140182c0a9.mp3",
41
+ "available_for_tiers": [],
42
+ "settings": null
43
+ },
44
+ {
45
+ "voice_id": "EXAVITQu4vr4xnSDxMaL",
46
+ "name": "Bella",
47
+ "samples": null,
48
+ "category": "premade",
49
+ "fine_tuning": {
50
+ "model_id": null,
51
+ "is_allowed_to_fine_tune": false,
52
+ "fine_tuning_requested": false,
53
+ "finetuning_state": "not_started",
54
+ "verification_attempts": null,
55
+ "verification_failures": [],
56
+ "verification_attempts_count": 0,
57
+ "slice_ids": null
58
+ },
59
+ "labels": {},
60
+ "description": null,
61
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/04365bce-98cc-4e99-9f10-56b60680cda9.mp3",
62
+ "available_for_tiers": [],
63
+ "settings": null
64
+ },
65
+ {
66
+ "voice_id": "ErXwobaYiN019PkySvjV",
67
+ "name": "Antoni",
68
+ "samples": null,
69
+ "category": "premade",
70
+ "fine_tuning": {
71
+ "model_id": null,
72
+ "is_allowed_to_fine_tune": false,
73
+ "fine_tuning_requested": false,
74
+ "finetuning_state": "not_started",
75
+ "verification_attempts": null,
76
+ "verification_failures": [],
77
+ "verification_attempts_count": 0,
78
+ "slice_ids": null
79
+ },
80
+ "labels": {},
81
+ "description": null,
82
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ErXwobaYiN019PkySvjV/38d8f8f0-1122-4333-b323-0b87478d506a.mp3",
83
+ "available_for_tiers": [],
84
+ "settings": null
85
+ },
86
+ {
87
+ "voice_id": "MF3mGyEYCl7XYWbV9V6O",
88
+ "name": "Elli",
89
+ "samples": null,
90
+ "category": "premade",
91
+ "fine_tuning": {
92
+ "model_id": null,
93
+ "is_allowed_to_fine_tune": false,
94
+ "fine_tuning_requested": false,
95
+ "finetuning_state": "not_started",
96
+ "verification_attempts": null,
97
+ "verification_failures": [],
98
+ "verification_attempts_count": 0,
99
+ "slice_ids": null
100
+ },
101
+ "labels": {},
102
+ "description": null,
103
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/MF3mGyEYCl7XYWbV9V6O/f9fd64c3-5d62-45cd-b0dc-ad722ee3284e.mp3",
104
+ "available_for_tiers": [],
105
+ "settings": null
106
+ },
107
+ {
108
+ "voice_id": "TxGEqnHWrfWFTfGW9XjX",
109
+ "name": "Josh",
110
+ "samples": null,
111
+ "category": "premade",
112
+ "fine_tuning": {
113
+ "model_id": null,
114
+ "is_allowed_to_fine_tune": false,
115
+ "fine_tuning_requested": false,
116
+ "finetuning_state": "not_started",
117
+ "verification_attempts": null,
118
+ "verification_failures": [],
119
+ "verification_attempts_count": 0,
120
+ "slice_ids": null
121
+ },
122
+ "labels": {},
123
+ "description": null,
124
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TxGEqnHWrfWFTfGW9XjX/c6c80dcd-5fe5-4a4c-a74c-b3fec4c62c67.mp3",
125
+ "available_for_tiers": [],
126
+ "settings": null
127
+ },
128
+ {
129
+ "voice_id": "VR6AewLTigWG4xSOukaG",
130
+ "name": "Arnold",
131
+ "samples": null,
132
+ "category": "premade",
133
+ "fine_tuning": {
134
+ "model_id": null,
135
+ "is_allowed_to_fine_tune": false,
136
+ "fine_tuning_requested": false,
137
+ "finetuning_state": "not_started",
138
+ "verification_attempts": null,
139
+ "verification_failures": [],
140
+ "verification_attempts_count": 0,
141
+ "slice_ids": null
142
+ },
143
+ "labels": {},
144
+ "description": null,
145
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/VR6AewLTigWG4xSOukaG/66e83dc2-6543-4897-9283-e028ac5ae4aa.mp3",
146
+ "available_for_tiers": [],
147
+ "settings": null
148
+ },
149
+ {
150
+ "voice_id": "pNInz6obpgDQGcFmaJgB",
151
+ "name": "Adam",
152
+ "samples": null,
153
+ "category": "premade",
154
+ "fine_tuning": {
155
+ "model_id": null,
156
+ "is_allowed_to_fine_tune": false,
157
+ "fine_tuning_requested": false,
158
+ "finetuning_state": "not_started",
159
+ "verification_attempts": null,
160
+ "verification_failures": [],
161
+ "verification_attempts_count": 0,
162
+ "slice_ids": null
163
+ },
164
+ "labels": {},
165
+ "description": null,
166
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pNInz6obpgDQGcFmaJgB/e0b45450-78db-49b9-aaa4-d5358a6871bd.mp3",
167
+ "available_for_tiers": [],
168
+ "settings": null
169
+ },
170
+ {
171
+ "voice_id": "yoZ06aMxZJJ28mfd3POQ",
172
+ "name": "Sam",
173
+ "samples": null,
174
+ "category": "premade",
175
+ "fine_tuning": {
176
+ "model_id": null,
177
+ "is_allowed_to_fine_tune": false,
178
+ "fine_tuning_requested": false,
179
+ "finetuning_state": "not_started",
180
+ "verification_attempts": null,
181
+ "verification_failures": [],
182
+ "verification_attempts_count": 0,
183
+ "slice_ids": null
184
+ },
185
+ "labels": {},
186
+ "description": null,
187
+ "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/yoZ06aMxZJJ28mfd3POQ/1c4d417c-ba80-4de8-874a-a1c57987ea63.mp3",
188
+ "available_for_tiers": [],
189
+ "settings": null
190
+ }
191
+ ]
192
+ }
images/2.jpg ADDED
images/3.jpg ADDED
images/hero.jpg ADDED