File size: 5,337 Bytes
4af4e17
 
 
48b8a2c
4af4e17
b4bce9a
 
 
 
 
4af4e17
b4bce9a
 
4af4e17
b4bce9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4af4e17
 
6c4f574
 
c68830c
b4bce9a
 
11da134
4af4e17
668ed0d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import pathlib
import uuid
import os
import gradio as gr
from tqdm import tqdm
import requests
import urllib.request
import json
import time
output_mp3="output.mp3"

def upload_image(img: str, d_id_key: str):
    url = "https://api.d-id.com/images"

    files = {"image": ("hero.jpg", open(img, "rb"), "image/jpg")}
    headers = {
        "accept": "application/json",
        "authorization": "Basic "+d_id_key
    }

    response = requests.post(url, files=files, headers=headers)

    response_dict = response.json()
    img_url = response_dict["url"]
    # return audio_url
    print(img_url)
    return img_url

def upload_audio(audio: str, d_id_key: str):
    url = "https://api.d-id.com/audios"

    files = {"audio": (audio, open(audio, "rb"), "audio/mpeg")}
    headers = {
        "accept": "application/json",
        "authorization": "Basic "+d_id_key
    }
    response = requests.post(url, files=files, headers=headers)
    response_dict = response.json()
    audio_url = response_dict["url"]
    # return audio_url
    print(audio_url)
    return audio_url

def get_did_video(process_video_url,d_id_key):
    url = "https://api.d-id.com/talks/"+process_video_url

    headers = {
        "accept": "application/json",
        "authorization": "Basic "+d_id_key
    }
    response_dict = {}

    while "result_url" not in response_dict:
      # make API call and get response dictionary
      response = requests.get(url, headers=headers)
      response_dict = response.json()
      
      print(response.text)
      
      # wait for 1 second before checking again
      time.sleep(1)
      
    # "result_url" key is now present in the dictionary
    result_url = response_dict["result_url"]

    print("From did_video \n\n\n")
    print("/n/n/n")

    # response_dict = response.json()
    result_url = response_dict["result_url"]

    print(result_url)
    return result_url

def text_to_speach_api(text: str, elv_key,voice_id: str):
    url = "https://api.elevenlabs.io/v1/text-to-speech/"+voice_id+"/stream"
    headers = {
        "accept": "*/*",
        "xi-api-key": elv_key,
        "Content-Type": "application/json",
    }
    data = {
        "text": text,
        "voice_settings": {
            "stability": 0,
            "similarity_boost": 0
        }
    }

    response = requests.post(url, headers=headers, json=data)
    # print(response.text)

    if response.ok:
        with open("output.mp3", "wb") as f:
            f.write(response.content)
    else:
        print("Error: ", response.text)

def get_voice_names():
    with open("data.json") as f:
        data = json.load(f)
    return [voice["name"] for voice in data["voices"]]



# define a function to get voice id by name
def get_voice_id(name):
    # load the JSON data
    with open("data.json") as f:
        data = json.load(f)
    for voice in data['voices']:
        if voice['name'] == name:
            return voice['voice_id']
    return None

#D-id API
def d_id_api(image_url, d_id_key,audio_url):
    print("D-id API")
    url = "https://api.d-id.com/talks"
    payload = {
        
    "source_url": image_url,
    "script": {
        "type": "audio",
        "audio_url": audio_url,
    }

    }
    headers = {
      "accept": "application/json",
      "content-type": "application/json",
    "authorization": "Basic "+d_id_key
  }

    response = requests.post(url, json=payload, headers=headers)
    print("From D-id API \n\n\n")
    print(response.text)
    response_dict = response.json()
    process_video = response_dict["id"]
    # return audio_url
    print(process_video)
    return process_video
    


def transcribe_video(d_id_key: str, elv_key: str, full_text: str,voice_name: str,img):
    print(voice_name)
    voice_id=get_voice_id(voice_name)
    text_to_speach_api(full_text, elv_key,voice_id)
    audio_url=upload_audio(output_mp3,d_id_key)
    image_url=upload_image(img,d_id_key)
    process_video_url=d_id_api(image_url, d_id_key,audio_url)
    video_url=get_did_video(process_video_url,d_id_key)
    file_name = 'hero.mp4'
    urllib.request.urlretrieve(video_url, file_name)
    return file_name
    

examples = [["", "","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
             "Arnold","./images/hero.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/3.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/2.jpg"]]

demo = gr.Interface(fn=transcribe_video, inputs=[
    gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
    gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
    gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
    gr.Dropdown(choices=get_voice_names(), label="Select a voice"),
    gr.Image(label="photo of a Person", type="filepath")
  ], outputs="video",title="Bring your images to life with the talking animation feature now!",examples=examples,cache_examples=False)

demo.launch()