Ahsen Khaliq commited on
Commit
c72151b
1 Parent(s): 5d41f4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -14,8 +14,8 @@ def get_optimal_font_scale(text, width):
14
  return scale/10
15
  return 1
16
 
17
- processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
18
- model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
19
  def map_to_array(file):
20
  speech, _ = sf.read(file)
21
  return speech
@@ -26,6 +26,8 @@ def inference(audio, image):
26
  transcription = processor.decode(predicted_ids[0])
27
  audio_clip = AudioFileClip(audio.name)
28
  image_clip = ImageClip(image.name).set_duration(audio_clip.duration)
 
 
29
  image_clip.write_videofile("my_video.mp4", fps=len(transcription.split())/audio_clip.duration)
30
  videoclip = VideoFileClip("my_video.mp4")
31
 
@@ -63,8 +65,6 @@ def inference(audio, image):
63
  output_clip = ImageSequenceClip(frame_list, fps=len(transcription.split())/audio_clip.duration)
64
  output_clip.audio = new_audioclip
65
  output_clip.write_videofile("output6.mp4")
66
- cap.release()
67
- cv2.destroyAllWindows()
68
  return transcription, 'output6.mp4'
69
 
70
  title = "Hubert"
14
  return scale/10
15
  return 1
16
 
17
+ processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-xlarge-ls960-ft")
18
+ model = HubertForCTC.from_pretrained("facebook/hubert-xlarge-ls960-ft")
19
  def map_to_array(file):
20
  speech, _ = sf.read(file)
21
  return speech
26
  transcription = processor.decode(predicted_ids[0])
27
  audio_clip = AudioFileClip(audio.name)
28
  image_clip = ImageClip(image.name).set_duration(audio_clip.duration)
29
+ image_clip = image_clip.resize(height=360) # make the height 360px ( According to moviePy documenation The width is then computed so that the width/height ratio is conserved.)
30
+
31
  image_clip.write_videofile("my_video.mp4", fps=len(transcription.split())/audio_clip.duration)
32
  videoclip = VideoFileClip("my_video.mp4")
33
 
65
  output_clip = ImageSequenceClip(frame_list, fps=len(transcription.split())/audio_clip.duration)
66
  output_clip.audio = new_audioclip
67
  output_clip.write_videofile("output6.mp4")
 
 
68
  return transcription, 'output6.mp4'
69
 
70
  title = "Hubert"