syedusama5556 commited on
Commit
cd2dfd4
1 Parent(s): 4a71c2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -56
app.py CHANGED
@@ -1,61 +1,81 @@
1
- import whisper
2
- from pytube import YouTube
3
  import gradio as gr
 
 
4
  import os
5
- import re
6
  import logging
7
 
8
- logging.basicConfig(level=logging.INFO)
9
- model = whisper.load_model("base")
10
-
11
- def get_text(url):
12
- #try:
13
- if url != '':
14
- output_text_transcribe = ''
15
-
16
- yt = YouTube(url)
17
- #video_length = yt.length --- doesn't work anymore - using byte file size of the audio file instead now
18
- #if video_length < 5400:
19
- video = yt.streams.filter(only_audio=True).first()
20
- out_file=video.download(output_path=".")
21
-
22
- file_stats = os.stat(out_file)
23
- logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
24
-
25
- if file_stats.st_size <= 30000000:
26
- base, ext = os.path.splitext(out_file)
27
- new_file = base+'.mp3'
28
- os.rename(out_file, new_file)
29
- a = new_file
30
-
31
- result = model.transcribe(a)
32
- return result['text'].strip()
 
 
 
 
 
 
 
 
 
 
33
  else:
34
- logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
35
- #finally:
36
- # raise gr.Error("Exception: There was a problem transcribing the audio.")
37
-
38
- def get_summary(article):
39
- first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
40
- b = summarizer(first_sentences, min_length = 20, max_length = 120, do_sample = False)
41
- b = b[0]['summary_text'].replace(' .', '.').strip()
42
- return b
43
-
44
- with gr.Blocks() as demo:
45
- gr.Markdown("<h1><center>Free Fast YouTube URL Video-to-Text using <a href=https://openai.com/blog/whisper/ target=_blank>OpenAI's Whisper</a> Model</center></h1>")
46
- #gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video and then create a summary of the video transcript.</center>")
47
- gr.Markdown("<center>Enter the link of any YouTube video to generate a text transcript of the video.</center>")
48
- gr.Markdown("<center><b>'Whisper is a neural net that approaches human level robustness and accuracy on English speech recognition.'</b></center>")
49
- gr.Markdown("<center>Transcription takes 5-10 seconds per minute of the video (bad audio/hard accents slow it down a bit). #patience<br />If you have time while waiting, drop a ♥️ and check out my <a href=https://www.artificial-intelligence.blog target=_blank>AI blog</a> (opens in new tab).</center>")
50
-
51
- input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')
52
- result_button_transcribe = gr.Button('Transcribe')
53
- output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
54
-
55
- #result_button_summary = gr.Button('2. Create Summary')
56
- #output_text_summary = gr.Textbox(placeholder='Summary of the YouTube video transcript.', label='Summary')
57
-
58
- result_button_transcribe.click(get_text, inputs = input_text_url, outputs = output_text_transcribe)
59
- #result_button_summary.click(get_summary, inputs = output_text_transcribe, outputs = output_text_summary)
60
-
61
- demo.queue(default_enabled = True).launch(debug = True)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import yt_dlp as yt
3
+ import whisper
4
  import os
5
+ import torch
6
  import logging
7
 
8
+ # Set up logging
9
+ logging.basicConfig(filename='transcription_logs.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
+
11
+ # Create a temporary download folder if it doesn't exist
12
+ temp_download_dir = os.path.join(os.getcwd(), "temp_download")
13
+ os.makedirs(temp_download_dir, exist_ok=True)
14
+
15
+ # Function to download audio from the given URL
16
+ def download_audio(url):
17
+ ydl_opts = {
18
+ 'format': 'bestaudio/best',
19
+ 'outtmpl': os.path.join(temp_download_dir, '%(title)s.%(ext)s'),
20
+ }
21
+ with yt.YoutubeDL(ydl_opts) as ydl:
22
+ info_dict = ydl.extract_info(url, download=True)
23
+ downloaded_file = ydl.prepare_filename(info_dict)
24
+ # Generate a new file name by replacing spaces with underscores
25
+ new_filename = os.path.join(temp_download_dir, os.path.basename(downloaded_file).replace(" ", "_"))
26
+ # Check if the new file name exists and create a unique name if necessary
27
+ base, extension = os.path.splitext(new_filename)
28
+ counter = 1
29
+ while os.path.exists(new_filename):
30
+ new_filename = f"{base}_{counter}{extension}"
31
+ counter += 1
32
+ # Rename the file
33
+ os.rename(downloaded_file, new_filename)
34
+ if os.path.exists(new_filename):
35
+ return new_filename
36
+ else:
37
+ raise Exception("Failed to download and rename audio file.")
38
+
39
+ # Function to transcribe audio to SRT format
40
+ def transcribe_to_srt(file_path):
41
+ if torch.cuda.is_available():
42
+ model = whisper.load_model("medium", device="cuda")
43
  else:
44
+ model = whisper.load_model("medium")
45
+
46
+ result = model.transcribe(file_path)
47
+
48
+ srt_content = ""
49
+ for i, segment in enumerate(result["segments"]):
50
+ start = segment["start"]
51
+ end = segment["end"]
52
+ text = segment["text"]
53
+ srt_content += f"{i + 1}\n"
54
+ srt_content += f"{start:.3f}".replace(".", ",") + " --> " + f"{end:.3f}".replace(".", ",") + "\n"
55
+ srt_content += text + "\n\n"
56
+
57
+ return srt_content
58
+
59
+ def transcribe_video(url):
60
+ try:
61
+ logging.info(f"Transcribing video from URL: {url}")
62
+ audio_file = download_audio(url)
63
+ logging.info(f"Downloaded audio file: {audio_file}")
64
+ srt_content = transcribe_to_srt(audio_file)
65
+ logging.info("Transcription completed successfully!")
66
+ # Optionally, remove the audio file after transcription
67
+ # os.remove(audio_file)
68
+ return srt_content
69
+ except Exception as e:
70
+ logging.error(f"An error occurred: {e}")
71
+ return f"An error occurred: {e}"
72
+
73
+ iface = gr.Interface(fn=transcribe_video, inputs="text", outputs="text", live=True, title="YouTube/TikTok Video to SRT Transcription")
74
+
75
+ # Display the logs in the interface
76
+ log_viewer = gr.Textbox(text="Logs will appear here...", readonly=True, height=200)
77
+ log_handler = logging.StreamHandler(log_viewer)
78
+ log_handler.setLevel(logging.INFO)
79
+ logging.getLogger().addHandler(log_handler)
80
+
81
+ iface.launch()