versae commited on
Commit
a4bdb54
1 Parent(s): 1eb7da1

Update duplex.py

Browse files
Files changed (1) hide show
  1. duplex.py +7 -6
duplex.py CHANGED
@@ -19,6 +19,7 @@ from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2P
19
  DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
20
  MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
21
  DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
 
22
 
23
  HEADER = """
24
  # Poor Man's Duplex
@@ -34,8 +35,8 @@ FOOTER = """
34
  """.strip()
35
 
36
  asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
37
- model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es)
38
- processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es)
39
  asr_es = pipeline(
40
  "automatic-speech-recognition",
41
  model=model_instance_es,
@@ -44,7 +45,7 @@ asr_es = pipeline(
44
  decoder=processor_es.decoder
45
  )
46
  tts_model_name = "facebook/tts_transformer-es-css10"
47
- speak_es = gr.Interface.load(f"huggingface/{tts_model_name}")
48
  transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
49
  def generate_es(text, **kwargs):
50
  # max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
@@ -68,13 +69,13 @@ asr_en = pipeline(
68
  decoder=processor_en.decoder
69
  )
70
  tts_model_name = "facebook/fastspeech2-en-ljspeech"
71
- speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
72
  transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
73
- generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
74
 
75
  empty_audio = 'empty.flac'
76
  sf.write(empty_audio, [], 16000)
77
- deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
78
 
79
  def generate_en(text, **kwargs):
80
  response = generate_iface(text)
 
19
  DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
20
  MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
21
  DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
22
+ HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
23
 
24
  HEADER = """
25
  # Poor Man's Duplex
 
35
  """.strip()
36
 
37
  asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
38
+ model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
39
+ processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
40
  asr_es = pipeline(
41
  "automatic-speech-recognition",
42
  model=model_instance_es,
 
45
  decoder=processor_es.decoder
46
  )
47
  tts_model_name = "facebook/tts_transformer-es-css10"
48
+ speak_es = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
49
  transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
50
  def generate_es(text, **kwargs):
51
  # max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
 
69
  decoder=processor_en.decoder
70
  )
71
  tts_model_name = "facebook/fastspeech2-en-ljspeech"
72
+ speak_en = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
73
  transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
74
+ generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B", api_key=HF_AUTH_TOKEN)
75
 
76
  empty_audio = 'empty.flac'
77
  sf.write(empty_audio, [], 16000)
78
+ deuncase = gr.Interface.load("huggingface/pere/DeUnCaser", api_key=HF_AUTH_TOKEN)
79
 
80
  def generate_en(text, **kwargs):
81
  response = generate_iface(text)