Musa commited on
Commit
f6a686e
β€’
1 Parent(s): ba13912

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -2
app.py CHANGED
@@ -1,5 +1,26 @@
 
 
 
 
1
  from fastspeech2 import FastSpeech2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  voice_conversion_model = FastSpeech2.from_pretrained("path/to/pretrained/voice_conversion_model")
 
3
  def convert_voice(text):
4
  converted_voice = voice_conversion_model(text)
5
  return converted_voice
@@ -12,7 +33,7 @@ def transcribe(microphone, state, task="transcribe"):
12
  text = pipe(file)["text"]
13
  converted_voice = convert_voice(text)
14
 
15
- return state + "\n" + converted_voice, state + "\n" + converted_voice
16
 
17
  mf_transcribe = gr.Interface(
18
  fn=transcribe,
@@ -31,8 +52,10 @@ mf_transcribe = gr.Interface(
31
  live=True,
32
  description=(
33
  "Transcribe long-form microphone or audio inputs and convert the voice with the click of a button! Demo uses the"
34
- f" checkpoint ~[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})~ and πŸ€— Transformers to transcribe audio files"
35
  " of arbitrary length and FastSpeech2 for voice conversion."
36
  ),
37
  allow_flagging="never",
38
  )
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import pytube as pt
4
+ from transformers import pipeline
5
  from fastspeech2 import FastSpeech2
6
+
7
+ MODEL_NAME = "openai/whisper-large-v2"
8
+
9
+ device = 0 if torch.cuda.is_available() else "cpu"
10
+
11
+ pipe = pipeline(
12
+ task="automatic-speech-recognition",
13
+ model=MODEL_NAME,
14
+ chunk_length_s=30,
15
+ device=device,
16
+ )
17
+
18
+ all_special_ids = pipe.tokenizer.all_special_ids
19
+ transcribe_token_id = all_special_ids[-5]
20
+ translate_token_id = all_special_ids[-6]
21
+
22
  voice_conversion_model = FastSpeech2.from_pretrained("path/to/pretrained/voice_conversion_model")
23
+
24
  def convert_voice(text):
25
  converted_voice = voice_conversion_model(text)
26
  return converted_voice
 
33
  text = pipe(file)["text"]
34
  converted_voice = convert_voice(text)
35
 
36
+ return state + "\n" + converted_voice, state + "\n" + converted_voice, converted_voice
37
 
38
  mf_transcribe = gr.Interface(
39
  fn=transcribe,
 
52
  live=True,
53
  description=(
54
  "Transcribe long-form microphone or audio inputs and convert the voice with the click of a button! Demo uses the"
55
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
56
  " of arbitrary length and FastSpeech2 for voice conversion."
57
  ),
58
  allow_flagging="never",
59
  )
60
+
61
+ mf_transcribe.launch(enable_queue=True)