Fralet commited on
Commit
0cd4364
·
verified ·
1 Parent(s): 1d15323

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -1,5 +1,6 @@
1
- from transformers import pipeline, DiffusionTextToSpeechPipeline
2
-
 
3
 
4
  # Initialize the translation pipeline for Russian to English
5
  translator = pipeline("translation_ru_to_en", model="Helsinki-NLP/opus-mt-ru-en")
@@ -19,11 +20,14 @@ summary = summarizer(translation, max_length=140, min_length=110, do_sample=Fals
19
 
20
  print("Summary: ", summary)
21
 
22
- pipeline = DiffusionTextToSpeechPipeline.from_pretrained("microsoft/tts-diffusion-xlarge-en")
 
 
 
23
 
24
  # Generate speech
25
- speech = pipeline(summary)
 
26
 
27
- # Save the output
28
- with open("output.wav", "wb") as f:
29
- f.write(speech["sampling_rate"].get_wav_data())
 
1
+ from transformers import pipeline, TFAutoModelForCausalLM, AutoTokenizer
2
+ import soundfile as sf
3
+ import torch
4
 
5
  # Initialize the translation pipeline for Russian to English
6
  translator = pipeline("translation_ru_to_en", model="Helsinki-NLP/opus-mt-ru-en")
 
20
 
21
  print("Summary: ", summary)
22
 
23
+ tokenizer = AutoTokenizer.from_pretrained("facebook/fastspeech2-en-ljspeech")
24
+ model = TFAutoModelForCausalLM.from_pretrained("facebook/fastspeech2-en-ljspeech")
25
+
26
+ inputs = tokenizer(summary, return_tensors="tf")
27
 
28
  # Generate speech
29
+ with torch.no_grad():
30
+ logits = model.generate(**inputs)
31
 
32
+ # Save the audio
33
+ sf.write('output_audio.wav', logits.numpy(), samplerate=16000)