arnavmehta7 commited on
Commit
f1a1bc0
1 Parent(s): b5a18cb

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +1 -0
  2. handler.py +43 -0
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
handler.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ from pathlib import Path
3
+ import tempfile
4
+ import torch
5
+ import torchaudio
6
+ import librosa
7
+
8
+ SAMPLE_RATE = 16000
9
+
10
+ class EndpointHandler():
11
+ def __init__(self, path=""):
12
+ # Load the MARS5 model
13
+ self.mars5, self.config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
14
+
15
+ def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
16
+ """
17
+ Args:
18
+ data (Dict[str, bytes]):
19
+ Includes the text, audio file path, and transcript.
20
+ Returns:
21
+ Dict[str, str]: Path to the synthesized audio file.
22
+ """
23
+ # Process input
24
+ text = data["text"]
25
+ audio_file = data["audio_file"]
26
+ transcript = data["transcript"]
27
+
28
+ # Load the reference audio
29
+ wav, sr = librosa.load(audio_file, sr=self.mars5.sr, mono=True)
30
+ wav = torch.from_numpy(wav)
31
+
32
+ # Define the configuration for the TTS model
33
+ deep_clone = True
34
+ cfg = self.config_class(deep_clone=deep_clone, rep_penalty_window=100, top_k=100, temperature=0.7, freq_penalty=3)
35
+
36
+ # Generate the synthesized audio
37
+ ar_codes, wav_out = self.mars5.tts(text, wav, transcript, cfg=cfg)
38
+
39
+ # Save the synthesized audio to a temporary file
40
+ output_path = Path(tempfile.mktemp(suffix=".wav"))
41
+ torchaudio.save(output_path, wav_out.unsqueeze(0), self.mars5.sr)
42
+
43
+ return {"synthesized_audio": str(output_path)}