Hobis commited on
Commit
3e91b4e
1 Parent(s): 001ec5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -18,7 +18,9 @@ tokenizer = CustomTokenizer.load_from_checkpoint('polish-HuBERT-quantizer_8_epoc
18
 
19
 
20
  def process_audio(in_file):
21
- wav, sr = torchaudio.load(in_file.name)
 
 
22
  if wav.shape[0] == 2:
23
  wav = wav.mean(0, keepdim=True)
24
  semantic_vectors = hubert_model.forward(wav, input_sample_hz=sr)
@@ -30,8 +32,11 @@ def process_audio(in_file):
30
  codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1).squeeze()
31
  fine_prompt = codes
32
  coarse_prompt = fine_prompt[:2, :]
33
- np.savez('output.npz', semantic_prompt=semantic_tokens, fine_prompt=fine_prompt, coarse_prompt=coarse_prompt)
34
- return 'output.npz'
 
 
 
35
 
36
  iface = gr.Interface(fn=process_audio, inputs=gr.inputs.File(label="Input Audio"), outputs=gr.outputs.File(label="Output File"))
37
  iface.launch()
@@ -41,3 +46,4 @@ iface.launch()
41
 
42
 
43
 
 
 
18
 
19
 
20
  def process_audio(in_file):
21
+ input_filename = in_file.name
22
+
23
+ wav, sr = torchaudio.load(input_filename)
24
  if wav.shape[0] == 2:
25
  wav = wav.mean(0, keepdim=True)
26
  semantic_vectors = hubert_model.forward(wav, input_sample_hz=sr)
 
32
  codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1).squeeze()
33
  fine_prompt = codes
34
  coarse_prompt = fine_prompt[:2, :]
35
+
36
+ output_filename = os.path.splitext(input_filename)[0] + '.npz'
37
+
38
+ np.savez(output_filename, semantic_prompt=semantic_tokens, fine_prompt=fine_prompt, coarse_prompt=coarse_prompt)
39
+ return output_filename
40
 
41
  iface = gr.Interface(fn=process_audio, inputs=gr.inputs.File(label="Input Audio"), outputs=gr.outputs.File(label="Output File"))
42
  iface.launch()
 
46
 
47
 
48
 
49
+