Spaces:

NouFuS
/

French_To_English_Speech

Sleeping

NouFuS commited on Mar 16

Commit

fc52adb

•

1 Parent(s): da7da4a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,25 +13,30 @@ print("Device:", device)
 model_id = "openai/whisper-large-v3"
 #model_id = "openai/whisper-medium"
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
-)
-model.to(device)
-processor = AutoProcessor.from_pretrained(model_id)
-pipe_transcription = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    max_new_tokens=128,
-    chunk_length_s=30,
-    batch_size=16,
-    return_timestamps=True,
-    torch_dtype=torch_dtype,
-    device=device,
-)
 pipe_translate = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=device)
 pipe_tts = pipeline("text-to-speech", model="facebook/mms-tts-eng", device=device) # Better quality, way faster than bark
@@ -40,7 +45,7 @@ def get_translation(text):
     return pipe_translate(text)[0]["translation_text"]
 def get_transcript(voice):
-    return pipe_transcription(voice, generate_kwargs={"task": "translate", "language": "french"})["text"]
 def get_audio(text):
     speech = pipe_tts(text)

 model_id = "openai/whisper-large-v3"
 #model_id = "openai/whisper-medium"
+# model_id = "openai/whisper-large-v3"
+# model_id = "openai/whisper-medium"
+# model = AutoModelForSpeechSeq2Seq.from_pretrained(
+#     model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+# )
+# model.to(device)
+# processor = AutoProcessor.from_pretrained(model_id)
+# pipe_transcription = pipeline(
+#     "automatic-speech-recognition",
+#     model=model,
+#     tokenizer=processor.tokenizer,
+#     feature_extractor=processor.feature_extractor,
+#     max_new_tokens=128,
+#     chunk_length_s=30,
+#     batch_size=16,
+#     return_timestamps=True,
+#     torch_dtype=torch_dtype,
+#     device=device,
+# )
+pipe_transcription = pipeline("automatic-speech-recognition", model="pierreguillou/whisper-medium-french")
 pipe_translate = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=device)
 pipe_tts = pipeline("text-to-speech", model="facebook/mms-tts-eng", device=device) # Better quality, way faster than bark
     return pipe_translate(text)[0]["translation_text"]
 def get_transcript(voice):
+    return get_translation(pipe_transcription(voice)["text"])#, generate_kwargs={"task": "translate", "language": "french"})["text"]
 def get_audio(text):
     speech = pipe_tts(text)