File size: 2,676 Bytes
f2d8fa0 b0c635c 475e087 346d904 701d40f cf0d196 701d40f f2d8fa0 60ace2e c786385 f2d8fa0 cf0d196 346d904 cf0d196 f95538c 991d60f 9166220 b0c635c cf0d196 cd07abe f2d8fa0 283777a b70251f 283777a cf0d196 c8bccce cf0d196 283777a c8bccce cf0d196 cd07abe cf0d196 cd07abe cf0d196 283777a f2d8fa0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
#import requests
from PIL import Image
import os
from share_btn import community_icon_html, loading_icon_html, share_js
token = os.environ.get('HF_TOKEN')
whisper_to_gpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
tts = gr.Interface.load(name="spaces/Flux9665/IMS-Toucan")
talking_face = gr.Blocks.load(name="spaces/fffiloni/one-shot-talking-face", api_key=token)
def infer(audio):
whisper_to_gpt_response = whisper_to_gpt(audio, "translate", fn_index=0)
#print(gpt_response)
audio_response = tts(whisper_to_gpt_response[1], "English Text", "English Accent", "English Speaker's Voice", fn_index=0)
#image = Image.open(r"wise_woman_portrait.png")
portrait_link = talking_face("wise_woman_portrait.png", audio_response, fn_index=0)
#portrait_response = requests.get(portrait_link, headers={'Authorization': 'Bearer ' + token})
#print(portrait_response.text)
return whisper_to_gpt_response[0], portrait_link, gr.update(visible=True)
title = """
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
margin-bottom: 10px;
"
>
<h1 style="font-weight: 600; margin-bottom: 7px;">
GPT Talking Portrait
</h1>
</div>
<p style="margin-bottom: 10px;font-size: 94%;font-weight: 100;line-height: 1.5em;">
Use Whisper to ask, alive portrait responds !
</p>
</div>
"""
with gr.Blocks(css="style.css") as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
gpt_response = gr.Video(label="Talking Portrait response", elem_id="video_out")
with gr.Column(elem_id="col-container-2"):
record_input = gr.Audio(source="microphone",type="filepath", label="Audio input", show_label=True, elem_id="record_btn")
whisper_tr = gr.Textbox(label="whisper english translation", elem_id="text_inp")
send_btn = gr.Button("Send my request !")
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
community_icon = gr.HTML(community_icon_html)
loading_icon = gr.HTML(loading_icon_html)
share_button = gr.Button("Share to community", elem_id="share-btn")
send_btn.click(infer, inputs=[record_input], outputs=[whisper_tr, gpt_response, share_group])
share_button.click(None, [], [], _js=share_js)
demo.queue(max_size=32, concurrency_count=20).launch(debug=True)
|