Spaces:

leeboykt
/

video-extractor

Sleeping

App Files Files Community

leeboykt commited on Jul 23, 2024

Commit

cda531f

1 Parent(s): 3014af9

change to old pipeline

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +41 -20
requirements.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv

app.py CHANGED Viewed

@@ -1,31 +1,52 @@
 import gradio as gr
 from moviepy.editor import VideoFileClip
 def transcribe_video(video_path):
-  """Transcribes the audio from a video file using Whisper.
-  Args:
-    video_path: Path to the video file.
-  Returns:
-    The transcribed text.
-  """
-  try:
-    # Extract audio from video
-    video = VideoFileClip(video_path)
-    audio_path = video_path.replace(".mp4", ".mp3")  # Assuming input is MP4
-    video.audio.write_audiofile(audio_path)
-    # Load the Whisper model
-    whisper = gr.load("models/openai/whisper-large-v3")
-    # Pass the audio file path to Whisper
-    with open(audio_path, "rb") as audio_file:
-        transcription = whisper(audio_file)
-        return transcription
-  except Exception as e:
-    return f"An error occurred: {e}"
 # Create the Gradio interface
 iface = gr.Interface(
@@ -36,4 +57,4 @@ iface = gr.Interface(
     description="Upload a video to transcribe its audio content.",
 )
-iface.launch()

 import gradio as gr
+import torch
 from moviepy.editor import VideoFileClip
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model_id = "openai/whisper-large-v3"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch_dtype, use_safetensors=True
+)
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=400,  # Adjusted to a lower value
+    chunk_length_s=30,
+    batch_size=16,
+    return_timestamps=True,
+    torch_dtype=torch_dtype,
+    device=device,
+)
 def transcribe_video(video_path):
+    """Transcribes the audio from a video file using Whisper.
+    Args:
+      video_path: Path to the video file.
+    Returns:
+      The transcribed text.
+    """
+    try:
+        # Extract audio from video
+        video = VideoFileClip(video_path)
+        audio_path = video_path.replace(".mp4", ".mp3")  # Assuming input is MP4
+        video.audio.write_audiofile(audio_path)
+        # Load the Whisper model
+        result = pipe(audio_path)
+        return result
+    except Exception as e:
+        return f"An error occurred: {e}"
 # Create the Gradio interface
 iface = gr.Interface(
     description="Upload a video to transcribe its audio content.",
 )
+iface.launch()

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 gradio
 moviepy

 gradio
 moviepy
+transformers
+torch