leeboykt commited on
Commit
cda531f
·
1 Parent(s): 3014af9

change to old pipeline

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +41 -20
  3. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
app.py CHANGED
@@ -1,31 +1,52 @@
1
  import gradio as gr
 
2
  from moviepy.editor import VideoFileClip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def transcribe_video(video_path):
5
- """Transcribes the audio from a video file using Whisper.
6
 
7
- Args:
8
- video_path: Path to the video file.
9
 
10
- Returns:
11
- The transcribed text.
12
- """
13
- try:
14
- # Extract audio from video
15
- video = VideoFileClip(video_path)
16
- audio_path = video_path.replace(".mp4", ".mp3") # Assuming input is MP4
17
- video.audio.write_audiofile(audio_path)
18
 
19
- # Load the Whisper model
20
- whisper = gr.load("models/openai/whisper-large-v3")
21
 
22
- # Pass the audio file path to Whisper
23
- with open(audio_path, "rb") as audio_file:
24
- transcription = whisper(audio_file)
25
 
26
- return transcription
27
- except Exception as e:
28
- return f"An error occurred: {e}"
29
 
30
  # Create the Gradio interface
31
  iface = gr.Interface(
@@ -36,4 +57,4 @@ iface = gr.Interface(
36
  description="Upload a video to transcribe its audio content.",
37
  )
38
 
39
- iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
  from moviepy.editor import VideoFileClip
4
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
5
+
6
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
+
8
+ model_id = "openai/whisper-large-v3"
9
+ processor = AutoProcessor.from_pretrained(model_id)
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
11
+ model_id, torch_dtype=torch_dtype, use_safetensors=True
12
+ )
13
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
14
+ pipe = pipeline(
15
+ "automatic-speech-recognition",
16
+ model=model,
17
+ tokenizer=processor.tokenizer,
18
+ feature_extractor=processor.feature_extractor,
19
+ max_new_tokens=400, # Adjusted to a lower value
20
+ chunk_length_s=30,
21
+ batch_size=16,
22
+ return_timestamps=True,
23
+ torch_dtype=torch_dtype,
24
+ device=device,
25
+ )
26
+
27
 
28
  def transcribe_video(video_path):
29
+ """Transcribes the audio from a video file using Whisper.
30
 
31
+ Args:
32
+ video_path: Path to the video file.
33
 
34
+ Returns:
35
+ The transcribed text.
36
+ """
37
+ try:
38
+ # Extract audio from video
39
+ video = VideoFileClip(video_path)
40
+ audio_path = video_path.replace(".mp4", ".mp3") # Assuming input is MP4
41
+ video.audio.write_audiofile(audio_path)
42
 
43
+ # Load the Whisper model
44
+ result = pipe(audio_path)
45
 
46
+ return result
47
+ except Exception as e:
48
+ return f"An error occurred: {e}"
49
 
 
 
 
50
 
51
  # Create the Gradio interface
52
  iface = gr.Interface(
 
57
  description="Upload a video to transcribe its audio content.",
58
  )
59
 
60
+ iface.launch()
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio
2
  moviepy
 
 
 
1
  gradio
2
  moviepy
3
+ transformers
4
+ torch