divakaivan commited on
Commit
78baa31
1 Parent(s): 397622f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -19
app.py CHANGED
@@ -35,24 +35,6 @@ vocabs = dataset.map(
35
  dataset_vocab = set(vocabs["vocab"][0])
36
  tokenizer_vocab = {k for k,_ in tokenizer.get_vocab().items()}
37
 
38
- replacements = [
39
- ('à', 'a'),
40
- ('ç', 'c'),
41
- ('è', 'e'),
42
- ('ë', 'e'),
43
- ('í', 'i'),
44
- ('ï', 'i'),
45
- ('ö', 'o'),
46
- ('ü', 'u'),
47
- ]
48
-
49
- def cleanup_text(inputs):
50
- for src, dst in replacements:
51
- inputs["transcription"] = inputs["transcription"].replace(src, dst)
52
- return inputs
53
-
54
- dataset = dataset.map(cleanup_text)
55
-
56
  import os
57
  import torch
58
  from speechbrain.inference.speaker import EncoderClassifier
@@ -119,7 +101,7 @@ def predict(text, speaker):
119
  # input_ids = input_ids[..., :model.config.max_text_positions]
120
 
121
  ### ### ###
122
- example = dataset['test'][11]
123
  speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
124
 
125
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
 
35
  dataset_vocab = set(vocabs["vocab"][0])
36
  tokenizer_vocab = {k for k,_ in tokenizer.get_vocab().items()}
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  import os
39
  import torch
40
  from speechbrain.inference.speaker import EncoderClassifier
 
101
  # input_ids = input_ids[..., :model.config.max_text_positions]
102
 
103
  ### ### ###
104
+ example = dataset['train'][888]
105
  speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
106
 
107
  spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)