AlexMo commited on
Commit
589d4a0
1 Parent(s): dea167e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +58 -0
  2. requirements.txt +12 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+ import os
6
+ import re
7
+
8
+ model = whisper.load_model("base")
9
+ # model = pipeline(model="AlexMo/FIFA_WC22_WINNER_LANGUAGE_MODEL")
10
+ summarizer = pipeline("summarization")
11
+
12
+
13
+ def getAudio(url):
14
+ link = YouTube(url)
15
+ video = link.streams.filter(only_audio=True).first()
16
+ file = video.download(output_path=".")
17
+ base, ext = os.path.splitext(file)
18
+ file_ext = base + '.mp3'
19
+ os.rename(file, file_ext)
20
+ return file_ext
21
+
22
+
23
+ def getText(url):
24
+ if url != '':
25
+ output_text_transcribe = ''
26
+ res = model.transcribe(getAudio(url))
27
+ return res['text'].strip()
28
+
29
+
30
+ def getSummary(article):
31
+ header = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
32
+ b = summarizer(header, min_length=15, max_length=120, do_sample=False)
33
+ b = b[0]['summary_text'].replace(' .', '.').strip()
34
+
35
+ return b
36
+
37
+
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown(
40
+ "<h1><center>Free Fast YouTube URL Video to Text using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a> Model</center></h1>")
41
+ gr.Markdown(
42
+ "<center>Enter the link of any YouTube video to generate a text transcript of the video and then create a summary of the video transcript.</center>")
43
+ gr.Markdown(
44
+ "<center><b>'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'</b></center>")
45
+ gr.Markdown(
46
+ "<center>Generating the transcript takes 5-10 seconds per minute of the video</center>")
47
+
48
+ input_text_url = gr.Textbox(placeholder='Youtube video URL', label='URL')
49
+ result_button_transcribe = gr.Button('1. Transcribe')
50
+ output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
51
+
52
+ result_button_summary = gr.Button('2. Create Summary')
53
+ output_text_summary = gr.Textbox(placeholder='Summary of the YouTube video transcript.', label='Summary')
54
+
55
+ result_button_transcribe.click(getText, inputs=input_text_url, outputs=output_text_transcribe)
56
+ result_button_summary.click(getSummary, inputs=output_text_transcribe, outputs=output_text_summary)
57
+
58
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hopsworks
2
+ joblib
3
+ scikit-learn
4
+ seaborn
5
+ dataframe-image
6
+ modal-client
7
+ gradio
8
+ pytube
9
+ whisper
10
+ transformers
11
+ re
12
+ os