Spaces:

jujutech
/

TalkingFace

Running

jujutech commited on Jul 11, 2024

Commit

59f0d90

verified ·

1 Parent(s): 80ee7cf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ import torch
 import librosa
 # Load the model and processor
-processor = Wav2Vec2Processor.from_pretrained("SpeechResearch/whisper-ft-normal")
-model = Wav2Vec2ForCTC.from_pretrained("SpeechResearch/whisper-ft-normal")
 def transcribe_speech(audio_path):
     speech, _ = librosa.load(audio_path, sr=16000)
@@ -16,12 +16,28 @@ def transcribe_speech(audio_path):
     transcription = processor.batch_decode(predicted_ids)
     return transcription[0]
 def pipe(text, voice, image_in):
-    # Assuming voice is a file path to the audio file
-    transcription = transcribe_speech(voice)
-    # Now use this transcription with your get_dreamtalk function
-    video = get_dreamtalk(image_in, transcription)
-    return video
 with gr.Blocks() as demo:
     with gr.Column():
@@ -48,4 +64,4 @@ with gr.Blocks() as demo:
         outputs=[video_o],
         concurrency_limit=3
     )
-demo.queue(max_size=10).launch(show_error=True, show_api=False)

 import librosa
 # Load the model and processor
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 def transcribe_speech(audio_path):
     speech, _ = librosa.load(audio_path, sr=16000)
     transcription = processor.batch_decode(predicted_ids)
     return transcription[0]
+def get_dreamtalk(image_in, speech):
+    try:
+        client = Client("https://fffiloni-dreamtalk.hf.space/")
+        result = client.predict(
+            speech, # filepath  in 'Audio input' Audio component
+            image_in,   # filepath  in 'Image' Image component
+            "M030_front_neutral_level1_001.mat",   # Literal in 'emotional style' Dropdown component
+            api_name="/infer"
+        )
+        return result['video']
+    except Exception as e:
+        print(f"Error in get_dreamtalk: {e}")
+        raise gr.Error(f"Error in get_dreamtalk: {str(e)}")
 def pipe(text, voice, image_in):
+    try:
+        speech = transcribe_speech(voice)
+        video = get_dreamtalk(image_in, speech)
+        return video
+    except Exception as e:
+        print(f"An error occurred while processing: {e}")
+        raise gr.Error(f"An error occurred while processing: {str(e)}")
 with gr.Blocks() as demo:
     with gr.Column():
         outputs=[video_o],
         concurrency_limit=3
     )
+demo.queue(max_size=10).launch(show_error=True, show_api=False)