os1187 SteveDigital commited on
Commit
37dd56c
0 Parent(s):

Duplicate from SteveDigital/free-fast-youtube-url-video-to-text-using-openai-whisper

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +49 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Free Youtube Url Video To Text Using Openai Whisper
3
+ emoji: 📉
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.11.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: gpl-3.0
11
+ duplicated_from: SteveDigital/free-fast-youtube-url-video-to-text-using-openai-whisper
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+ import os
6
+ import re
7
+
8
+ model = whisper.load_model("base")
9
+ summarizer = pipeline("summarization")
10
+
11
+ def get_audio(url):
12
+ yt = YouTube(url)
13
+ video = yt.streams.filter(only_audio=True).first()
14
+ out_file=video.download(output_path=".")
15
+ base, ext = os.path.splitext(out_file)
16
+ new_file = base+'.mp3'
17
+ os.rename(out_file, new_file)
18
+ a = new_file
19
+ return a
20
+
21
+ def get_text(url):
22
+ if url != '' : output_text_transcribe = ''
23
+ result = model.transcribe(get_audio(url))
24
+ return result['text'].strip()
25
+
26
+ def get_summary(article):
27
+ first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
28
+ b = summarizer(first_sentences, min_length = 20, max_length = 120, do_sample = False)
29
+ b = b[0]['summary_text'].replace(' .', '.').strip()
30
+
31
+ return b
32
+
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("<h1><center>Free Fast YouTube URL Video to Text using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a> Model</center></h1>")
35
+ gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video and then create a summary of the video transcript.</center>")
36
+ gr.Markdown("<center><b>'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'</b></center>")
37
+ gr.Markdown("<center>Generating the transcript takes 5-10 seconds per minute of the video (when I am using this space I boost performance for everyone). #patience</center>")
38
+
39
+ input_text_url = gr.Textbox(placeholder='Youtube video URL', label='URL')
40
+ result_button_transcribe = gr.Button('1. Transcribe')
41
+ output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
42
+
43
+ result_button_summary = gr.Button('2. Create Summary')
44
+ output_text_summary = gr.Textbox(placeholder='Summary of the YouTube video transcript.', label='Summary')
45
+
46
+ result_button_transcribe.click(get_text, inputs = input_text_url, outputs = output_text_transcribe)
47
+ result_button_summary.click(get_summary, inputs = output_text_transcribe, outputs = output_text_summary)
48
+
49
+ demo.launch(debug = True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ git+https://github.com/huggingface/transformers
3
+ pytube