humeur commited on
Commit
3882a86
1 Parent(s): edc4957

Final changes

Browse files
Files changed (2) hide show
  1. app.py +6 -19
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,33 +1,20 @@
1
  import gradio as gr
2
  from pytube import YouTube
3
  from transformers import pipeline
4
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
5
- import soundfile
6
- import os
7
- import subprocess
8
 
9
  class GradioInference():
10
  def __init__(self):
11
- self.processor = WhisperProcessor.from_pretrained("humeur/whisper-small-sv-en")
12
-
13
- self.model = WhisperForConditionalGeneration.from_pretrained("humeur/whisper-small-sv-en")
14
  self.yt = None
15
-
16
  def __call__(self, link):
17
  if self.yt is None:
18
  self.yt = YouTube(link)
19
  path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
20
- subprocess.run([
21
- 'ffmpeg', '-i', 'tmp.mp4','-vn', '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '44100', '-f', 'wav','tmp.wav'
22
- ])
23
- sound_data = soundfile.read('tmp.wav')
24
- input_features = self.processor(sound_data, return_tensors="pt").input_features
25
- forced_decoder_ids = self.processor.get_decoder_prompt_ids(language = "sv", task = "translate")
26
- predicted_ids = self.model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
27
- results = self.processor.batch_decode(predicted_ids, skip_special_tokens = True)
28
- # results = self.model(path)
29
- # return results["text"]
30
- return results
31
 
32
  def populate_metadata(self, link):
33
  self.yt = YouTube(link)
 
1
  import gradio as gr
2
  from pytube import YouTube
3
  from transformers import pipeline
 
 
 
 
4
 
5
  class GradioInference():
6
  def __init__(self):
7
+ self.transcribe_model = pipeline(model='humeur/lab2_id2223')
8
+ self.translate_model = pipeline("translation_SV_to_EN", model="Helsinki-NLP/opus-mt-sv-en")
 
9
  self.yt = None
10
+
11
  def __call__(self, link):
12
  if self.yt is None:
13
  self.yt = YouTube(link)
14
  path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
15
+ results = self.transcribe_model(path)
16
+ results = self.translate_model(results["text"])
17
+ return results['translation_text']
 
 
 
 
 
 
 
 
18
 
19
  def populate_metadata(self, link):
20
  self.yt = YouTube(link)
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
  transformers
 
2
  pytube
3
  torch
4
  torchaudio
5
- sentencepiece
6
- soundfile
 
1
  transformers
2
+ transformers[sentencepiece]
3
  pytube
4
  torch
5
  torchaudio