arham061 commited on
Commit
f4c0261
1 Parent(s): 3157df7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -33
app.py CHANGED
@@ -14,36 +14,6 @@ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
14
  tokenizer = processor.tokenizer
15
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
16
 
17
- def prepare_dataset(example):
18
- audio = example["audio"]
19
-
20
- example = processor(
21
- text=transString(example["sentence"]),
22
- audio_target=audio["array"],
23
- sampling_rate=audio["sampling_rate"],
24
- return_attention_mask=False,
25
- )
26
-
27
- # strip off the batch dimension
28
- example["labels"] = example["labels"][0]
29
-
30
- # use SpeechBrain to obtain x-vector
31
- example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
32
-
33
- return example
34
-
35
- # Set the authentication token
36
- config.HF_DATASETS_CUSTOM_HEADERS = {
37
- "Authorization": "Bearer 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"
38
- }
39
- from huggingface_hub import notebook_login
40
-
41
- notebook_login()
42
-
43
- test_dataset = load_dataset("mozilla-foundation/common_voice_13_0", "ur", split="test")
44
- test_dataset = test_dataset.cast_column("audio", Audio(sampling_rate=16000))
45
- test_dataset = test_dataset.map(prepare_dataset, remove_columns=test_dataset.column_names)
46
-
47
 
48
  # Buckwalter to Unicode mapping
49
  buck2uni = {
@@ -122,9 +92,7 @@ def generate_audio(text):
122
  inputs = processor(text=roman_urdu, return_tensors="pt")
123
 
124
  # Generate audio from the SpeechT5 model
125
- example = test_dataset[22]
126
- speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
127
-
128
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
129
 
130
  return speech
 
14
  tokenizer = processor.tokenizer
15
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Buckwalter to Unicode mapping
19
  buck2uni = {
 
92
  inputs = processor(text=roman_urdu, return_tensors="pt")
93
 
94
  # Generate audio from the SpeechT5 model
95
+ speaker_embeddings = torch.tensor(np.load("speaker_embeddings.npy"))
 
 
96
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
97
 
98
  return speech