arnavmehta7 commited on
Commit
0b5b973
1 Parent(s): 6ae1eee

added the code

Browse files
Files changed (2) hide show
  1. app.py +47 -4
  2. requirements.txt +36 -0
app.py CHANGED
@@ -1,7 +1,50 @@
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+
2
  import gradio as gr
3
+ import torch
4
+ import librosa
5
+ import IPython.display as ipd
6
+ from pathlib import Path
7
+ import tempfile, torchaudio
8
+
9
+
10
+ # Load the MARS5 model
11
+ mars5, config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
12
+
13
+ # Default reference audio and transcript
14
+ # default_audio_path = "example.wav"
15
+ # default_transcript = "We actually haven't managed to meet demand."
16
+
17
+ # Function to process the text and audio input and generate the synthesized output
18
+ def synthesize(text, audio_file, transcript):
19
+ # Load the reference audio
20
+ wav, sr = librosa.load(audio_file, sr=mars5.sr, mono=True)
21
+ wav = torch.from_numpy(wav)
22
+
23
+ # Define the configuration for the TTS model
24
+ deep_clone = True
25
+ cfg = config_class(deep_clone=deep_clone, rep_penalty_window=100, top_k=100, temperature=0.7, freq_penalty=3)
26
+
27
+ # Generate the synthesized audio
28
+ ar_codes, wav_out = mars5.tts(text, wav, transcript, cfg=cfg)
29
+
30
+ # Save the synthesized audio to a temporary file
31
+ output_path = Path(tempfile.mktemp(suffix=".wav"))
32
+ torchaudio.save(output_path, wav_out.unsqueeze(0), mars5.sr)
33
+
34
+ return str(output_path)
35
 
36
+ # Create the Gradio interface
37
+ interface = gr.Interface(
38
+ fn=synthesize,
39
+ inputs=[
40
+ gr.Textbox(label="Text to synthesize"),
41
+ gr.Audio(label="Audio file to clone from", type="filepath"),
42
+ gr.Textbox(label="Uploaded audio file transcript"),
43
+ ],
44
+ outputs=gr.Audio(label="Synthesized Audio"),
45
+ title="MARS5 TTS Demo",
46
+ description="Enter text and upload an audio file to clone the voice and generate synthesized speech using MARS5 TTS."
47
+ )
48
 
49
+ # Launch the Gradio app
50
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ffmpeg-python
2
+ python-dotenv
3
+ pysrt
4
+ pydub
5
+ torchserve
6
+ torch-model-archiver
7
+ torch-workflow-archiver
8
+ portalocker
9
+ tenacity
10
+ httpx
11
+ python-Levenshtein
12
+ nvgpu
13
+ torch
14
+ torchvision
15
+ torchtext
16
+ torchaudio
17
+ speechtokenizer
18
+ matplotlib
19
+ pandas
20
+ numpy
21
+ ToJyutping
22
+ pypinyin
23
+ phonemizer
24
+ gruut-ipa
25
+ dateparser~=1.1.8
26
+ langcodes
27
+ language-data
28
+ vocos
29
+ einops
30
+ scipy
31
+ onnxruntime
32
+ unidecode
33
+ encodec
34
+ faster-whisper
35
+ tiktoken
36
+ librosa