kollis commited on
Commit
606152f
1 Parent(s): bc6475a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -5,7 +5,7 @@ import torchaudio
5
  from transformers import AutoProcessor, SeamlessM4TModel
6
  processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
7
  model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
- model.to('cuda')
9
 
10
  language_dict = {
11
  "Modern Standard Arabic" : "arb",
@@ -57,14 +57,14 @@ def png(source_lang,target_lang,audio,text):
57
  processed_inputs = processor(text, src_lang=source_lang_code, return_tensors="pt")
58
  else:
59
  sample_rate, audio_data = audio
60
- audio_tokens = torch.from_numpy(audio_data).to(torch.device("cuda"))
61
  audio_tokens = audio_tokens.to(torch.float32)
62
  audio_tokens = torchaudio.functional.resample(audio_tokens, orig_freq=sample_rate, new_freq=16_000)
63
- audio_tokens = audio_tokens.cpu()
64
  processed_inputs = processor(audios=audio_tokens, sampling_rate=16000, return_tensors="pt")
65
 
66
 
67
- processed_inputs = processed_inputs.to("cuda")
68
  generated_audio = model.generate(**processed_inputs, tgt_lang=target_lang_code)[0].cpu().numpy().squeeze()
69
  output_tokens = model.generate(**processed_inputs, tgt_lang=target_lang_code, generate_speech=False)
70
  generated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
 
5
  from transformers import AutoProcessor, SeamlessM4TModel
6
  processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
7
  model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
+ # model.to('cuda')
9
 
10
  language_dict = {
11
  "Modern Standard Arabic" : "arb",
 
57
  processed_inputs = processor(text, src_lang=source_lang_code, return_tensors="pt")
58
  else:
59
  sample_rate, audio_data = audio
60
+ audio_tokens = torch.from_numpy(audio_data) #.to(torch.device("cuda"))
61
  audio_tokens = audio_tokens.to(torch.float32)
62
  audio_tokens = torchaudio.functional.resample(audio_tokens, orig_freq=sample_rate, new_freq=16_000)
63
+ # audio_tokens = audio_tokens.cpu()
64
  processed_inputs = processor(audios=audio_tokens, sampling_rate=16000, return_tensors="pt")
65
 
66
 
67
+ # processed_inputs = processed_inputs.to("cuda")
68
  generated_audio = model.generate(**processed_inputs, tgt_lang=target_lang_code)[0].cpu().numpy().squeeze()
69
  output_tokens = model.generate(**processed_inputs, tgt_lang=target_lang_code, generate_speech=False)
70
  generated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)