NouFuS commited on
Commit
fc52adb
1 Parent(s): da7da4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -13,25 +13,30 @@ print("Device:", device)
13
  model_id = "openai/whisper-large-v3"
14
  #model_id = "openai/whisper-medium"
15
 
16
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
18
- )
19
- model.to(device)
20
-
21
- processor = AutoProcessor.from_pretrained(model_id)
22
-
23
- pipe_transcription = pipeline(
24
- "automatic-speech-recognition",
25
- model=model,
26
- tokenizer=processor.tokenizer,
27
- feature_extractor=processor.feature_extractor,
28
- max_new_tokens=128,
29
- chunk_length_s=30,
30
- batch_size=16,
31
- return_timestamps=True,
32
- torch_dtype=torch_dtype,
33
- device=device,
34
- )
 
 
 
 
 
35
  pipe_translate = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=device)
36
  pipe_tts = pipeline("text-to-speech", model="facebook/mms-tts-eng", device=device) # Better quality, way faster than bark
37
 
@@ -40,7 +45,7 @@ def get_translation(text):
40
  return pipe_translate(text)[0]["translation_text"]
41
 
42
  def get_transcript(voice):
43
- return pipe_transcription(voice, generate_kwargs={"task": "translate", "language": "french"})["text"]
44
 
45
  def get_audio(text):
46
  speech = pipe_tts(text)
 
13
  model_id = "openai/whisper-large-v3"
14
  #model_id = "openai/whisper-medium"
15
 
16
+ # model_id = "openai/whisper-large-v3"
17
+ # model_id = "openai/whisper-medium"
18
+
19
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained(
20
+ # model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
21
+ # )
22
+ # model.to(device)
23
+
24
+ # processor = AutoProcessor.from_pretrained(model_id)
25
+
26
+ # pipe_transcription = pipeline(
27
+ # "automatic-speech-recognition",
28
+ # model=model,
29
+ # tokenizer=processor.tokenizer,
30
+ # feature_extractor=processor.feature_extractor,
31
+ # max_new_tokens=128,
32
+ # chunk_length_s=30,
33
+ # batch_size=16,
34
+ # return_timestamps=True,
35
+ # torch_dtype=torch_dtype,
36
+ # device=device,
37
+ # )
38
+
39
+ pipe_transcription = pipeline("automatic-speech-recognition", model="pierreguillou/whisper-medium-french")
40
  pipe_translate = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en", device=device)
41
  pipe_tts = pipeline("text-to-speech", model="facebook/mms-tts-eng", device=device) # Better quality, way faster than bark
42
 
 
45
  return pipe_translate(text)[0]["translation_text"]
46
 
47
  def get_transcript(voice):
48
+ return get_translation(pipe_transcription(voice)["text"])#, generate_kwargs={"task": "translate", "language": "french"})["text"]
49
 
50
  def get_audio(text):
51
  speech = pipe_tts(text)