Spaces:

michakomo
/

youtube-sum

Sleeping

App Files Files Community

michakomo commited on Jun 9, 2023

Commit

6fd27e6

•

1 Parent(s): b9cf626

Update app.py

Browse files

Files changed (2) hide show

app.py +90 -4
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,7 +1,93 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+import whisper
+from pytube import YouTube
+from typing import List
+from transformers import pipeline
+def transcribe(
+        url: str,
+        model_size: str
+) -> str:
+    # Get audio from the video.
+    yt_client = YouTube(url=url)
+    audio_file = yt_client.streams.filter(only_audio=True)[0].download(filename="file.mp4")
+    # Load the model
+    model = whisper.load_model(model_size)
+    # Load the audio into the model
+    audio = whisper.load_audio(audio_file)
+    # Get results
+    result = model.transcribe(audio)
+    return format_result(result), summarize(result["text"])
+def summarize(text: str) -> str:
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    out = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
+    return out
+def format_result(result: whisper.DecodingResult) -> str:
+    out = []
+    for item in result["segments"]:
+        out.append(f"from {item['start']:6.2f} to {item['end']:6.2f} {item['text']}")
+    return "\n".join(out)
+def get_model_sizes() -> List[str]:
+    """
+    :rtype: list
+    :return: List of possible sizes of the Whisper model.
+    """
+    return list(
+        whisper._MODELS.keys()
+    )
+title = "YouTube transcribe + summarization"
+desc = "Transcribe YouTube videos using OpenAI Whisper."
+with gr.Blocks() as demo:
+    gr.HTML(title)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(
+                f"""
+                {desc}
+                """
+            )
+    with gr.Row():
+        model_size = gr.Dropdown(
+            label="Model size",
+            choices=get_model_sizes(),
+            value="tiny"
+        )
+        url = gr.Textbox(label="YouTube URL")
+    with gr.Row():
+        text = gr.Textbox(
+            label="Transcription",
+            lines=10
+        )
+    with gr.Row():
+        summarization = gr.Textbox(
+            label="Summarization",
+            lines=5
+        )
+    with gr.Row().style(equal_height=True):
+        submit_button = gr.Button("Submit")
+    submit_button.click(
+        transcribe,
+        inputs=[
+            url,
+            model_size
+        ],
+        outputs=[
+            text,
+            summarization
+        ]
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pytube
+openai-whisper
+transformers