File size: 2,772 Bytes
f2d8fa0
b0c635c
475e087
346d904
701d40f
cf0d196
 
 
701d40f
f2d8fa0
60ace2e
c786385
f2d8fa0
 
cf0d196
346d904
cf0d196
f95538c
991d60f
9166220
b0c635c
 
cf0d196
 
f2d8fa0
283777a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b70251f
283777a
 
 
 
 
cf0d196
c8bccce
 
 
cf0d196
 
283777a
c8bccce
cf0d196
a48c1fb
 
 
 
cf0d196
 
 
283777a
 
f2d8fa0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
#import requests

from PIL import Image
import os

from share_btn import community_icon_html, loading_icon_html, share_js

token = os.environ.get('HF_TOKEN')
whisper_to_gpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)

def infer(audio):
    whisper_to_gpt_response = whisper_to_gpt(audio, "translate", fn_index=0)
    #print(gpt_response)
    audio_response = tts(whisper_to_gpt_response[1], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
    #image = Image.open(r"wise_woman_portrait.png")
    portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
    
    #portrait_response = requests.get(portrait_link, headers={'Authorization': 'Bearer ' + token})
    #print(portrait_response.text)
    
    return whisper_to_gpt_response[0], portrait_link, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)

title = """
    <div style="text-align: center; max-width: 500px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
            margin-bottom: 10px;
        "
        >
        <h1 style="font-weight: 600; margin-bottom: 7px;">
            GPT Talking Portrait
        </h1>
        </div>
        <p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
        Use Whisper to ask, alive portrait responds  !
        </p>
    </div>
"""

with gr.Blocks(css="style.css") as demo:
    
    with gr.Column(elem_id="col-container"):
        
        gr.HTML(title)
        
        gpt_response = gr.Video(label="Talking Portrait response", elem_id="video_out")
             
    with gr.Column(elem_id="col-container-2"):
          
        record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True, elem_id="record_btn")
        whisper_tr = gr.Textbox(label="whisper english translation", elem_id="text_inp")

        send_btn = gr.Button("Send my request !")
    
        with gr.Group(elem_id="share-btn-container"):
                community_icon = gr.HTML(community_icon_html, visible=False)
                loading_icon = gr.HTML(loading_icon_html, visible=False)
                share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
    
    send_btn.click(infer, inputs=[record_input], outputs=[whisper_tr, gpt_response, share_button, community_icon, loading_icon])
    share_button.click(None, [], [], _js=share_js)

demo.queue(max_size=32, concurrency_count=20).launch(debug=True)