File size: 6,808 Bytes
f2d8fa0
502eb97
346d904
701d40f
cf0d196
0b14976
 
f3e3da1
 
cf0d196
 
701d40f
3419e26
60ace2e
c786385
f2d8fa0
613f964
0b14976
 
 
f3e3da1
0b14976
 
 
991d60f
9166220
891bd19
0b14976
613f964
0d83231
0b14976
f3e3da1
41be210
0b14976
 
 
41be210
0b14976
 
 
41be210
0b14976
 
 
41be210
0b14976
 
 
41be210
0b14976
 
 
41be210
0b14976
 
 
3f07526
0b14976
 
 
41be210
0b14976
f3e3da1
0b14976
 
cf0d196
f3e3da1
 
0b14976
 
 
 
 
 
 
 
 
 
 
f2d8fa0
706b0e7
891bd19
706b0e7
283777a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14af929
 
 
 
 
 
 
 
 
 
 
 
 
 
b70251f
283777a
 
 
 
 
cf0d196
4cb9798
7ed097f
a104929
9be3301
7ed097f
 
 
 
4cb9798
61014ce
c8bccce
 
ccf45a5
891bd19
 
d975815
7ed097f
 
14af929
 
 
706b0e7
891bd19
1991ec8
cf0d196
283777a
 
f2d8fa0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr

from PIL import Image
import os

import openai

#api_key = os.environ.get('api_key')

from share_btn import community_icon_html, loading_icon_html, share_js

token = os.environ.get('HF_TOKEN')
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)

def infer(audio, openai_api_key):
    
    whisper_result = whisper(audio, None, "translate", fn_index=0)
    
    gpt_response = try_api(whisper_result, openai_api_key)
    
    audio_response = tts(gpt_response[0], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
    
    portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
    
    return gr.Textbox.update(value=whisper_result, visible=True), portrait_link, gr.Textbox.update(value=gpt_response[1], visible=True), gr.update(visible=True), gr.Button.update(visible=True)

def try_api(message, openai_api_key):

    try:
        response = call_api(message, openai_api_key)
        return response, "<span class='openai_clear'>no error</span>"
    except openai.error.Timeout as e:
        #Handle timeout error, e.g. retry or log
        print(f"OpenAI API request timed out: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request timed out: <br />{e}</span>"
    except openai.error.APIError as e:
        #Handle API error, e.g. retry or log
        print(f"OpenAI API returned an API Error: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API returned an API Error: <br />{e}</span>"
    except openai.error.APIConnectionError as e:
        #Handle connection error, e.g. check network or log
        print(f"OpenAI API request failed to connect: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request failed to connect: <br />{e}</span>"
    except openai.error.InvalidRequestError as e:
        #Handle invalid request error, e.g. validate parameters or log
        print(f"OpenAI API request was invalid: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request was invalid: <br />{e}</span>"
    except openai.error.AuthenticationError as e:
        #Handle authentication error, e.g. check credentials or log
        print(f"OpenAI API request was not authorized: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request was not authorized: <br />{e}</span>"
    except openai.error.PermissionError as e:
        #Handle permission error, e.g. check scope or log
        print(f"OpenAI API request was not permitted: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request was not permitted: <br />{e}</span>"
    except openai.error.RateLimitError as e:
        #Handle rate limit error, e.g. wait or log
        print(f"OpenAI API request exceeded rate limit: {e}")
        return "oups", f"<span class='openai_error'>OpenAI API request exceeded rate limit: <br />{e}</span>"

def call_api(message, openai_api_key):
          
    print("starting open ai")
    
    openai.api_key = openai_api_key
    
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=message,
        temperature=0.5,
        max_tokens=2048,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0.6
    )

    return str(response.choices[0].text).split("\n",2)[2]

def clean_components():
    return gr.Audio.update(value=None), gr.HTML.update(visible=False), gr.Textbox.update(visible=False), gr.Video.update(value=None), gr.Button.update(visible=False)

title = """
    <div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            GPT Talking Portrait
        </h1>
        </div>
        <p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
        Use Whisper to ask, alive portrait responds  !
        </p>
    </div>
"""

article = """
    <p style="font-size: 0.8em;line-height: 1.2em;">Note: this demo is not able to sustain a conversation from earlier responses. 
    For more detailed results and dialogue, you should use the official ChatGPT interface.
    <br />β€”
    <br/>Also, be aware that audio records from iOS devices will not be decoded as expected by Gradio. For the best experience, record your voice from a computer instead of your smartphone ;)</p>
    <div class="footer">
        <p>Whisper & 
        <a href="https://chat.openai.com/chat" target="_blank">chatGPT</a> 
        by <a href="https://openai.com/" style="text-decoration: underline;" target="_blank">OpenAI</a> - 
        Gradio Demo by πŸ€— <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a>
        </p>
    </div>
"""

with gr.Blocks(css="style.css") as demo:
    
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
        
        gpt_response = gr.Video(label="Talking Portrait response", elem_id="video_out")
        whisper_tr = gr.Textbox(label="whisper english translation", elem_id="text_inp", visible=False)

        with gr.Row(elem_id="secondary-buttons"):
            clean_btn = gr.Button(value="Clean", elem_id="clean-btn", visible=False)
            with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
                    community_icon = gr.HTML(community_icon_html)
                    loading_icon = gr.HTML(loading_icon_html)
                    share_button = gr.Button("Share to community", elem_id="share-btn")
        
        error_handler = gr.HTML(visible=False, show_label=False, elem_id="error_handler")
             
    with gr.Column(elem_id="col-container-2"):
        with gr.Column():
            with gr.Row():
                record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True, elem_id="record_btn")
                openai_api_key = gr.Textbox(max_lines=1, type="password", label="πŸ” Your OpenAI API Key", placeholder="sk-123abc...")
            
                
        send_btn = gr.Button("Send my request !")

        gr.HTML(article)
    
    clean_btn.click(clean_components, inputs=[], outputs=[record_input, error_handler, whisper_tr, gpt_response, clean_btn])   
    send_btn.click(infer, inputs=[record_input, openai_api_key], outputs=[whisper_tr, gpt_response, error_handler, share_group, clean_btn])
    share_button.click(None, [], [], _js=share_js)

demo.queue(max_size=32, concurrency_count=20).launch(debug=True)