artificialguybr commited on
Commit
bdd072a
1 Parent(s): 240de18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -45
app.py CHANGED
@@ -10,6 +10,7 @@ import spaces
10
  import moviepy.editor as mp
11
  import time
12
  import langdetect
 
13
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  print("Starting the program...")
@@ -21,8 +22,17 @@ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float
21
  model = model.eval()
22
  print("Model successfully loaded.")
23
 
24
- def download_youtube_audio(url, output_path):
 
 
 
 
 
 
 
 
25
  print(f"Downloading audio from YouTube: {url}")
 
26
  ydl_opts = {
27
  'format': 'bestaudio/best',
28
  'postprocessors': [{
@@ -34,16 +44,13 @@ def download_youtube_audio(url, output_path):
34
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
  ydl.download([url])
36
 
37
- # Check if the file was renamed to .wav.wav
38
- if os.path.exists(output_path + ".wav"):
39
- os.rename(output_path + ".wav", output_path)
40
-
41
  if os.path.exists(output_path):
42
  print(f"Audio download completed. File saved at: {output_path}")
43
  print(f"File size: {os.path.getsize(output_path)} bytes")
44
  else:
45
  print(f"Error: File {output_path} not found after download.")
46
-
 
47
 
48
  @spaces.GPU(duration=60)
49
  def transcribe_audio(file_path):
@@ -52,15 +59,15 @@ def transcribe_audio(file_path):
52
  print("Video file detected. Extracting audio...")
53
  try:
54
  video = mp.VideoFileClip(file_path)
55
- audio_path = "temp_audio.wav"
56
  video.audio.write_audiofile(audio_path)
 
57
  file_path = audio_path
58
  except Exception as e:
59
  print(f"Error extracting audio from video: {e}")
60
  raise
61
- print(f"Does the file exist? {os.path.exists(file_path)}")
62
- print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
63
- output_file = "output.json"
64
  command = [
65
  "insanely-fast-whisper",
66
  "--file-name", file_path,
@@ -73,84 +80,62 @@ def transcribe_audio(file_path):
73
  print(f"Executing command: {' '.join(command)}")
74
  try:
75
  result = subprocess.run(command, check=True, capture_output=True, text=True)
76
- print(f"Standard output: {result.stdout}")
77
- print(f"Error output: {result.stderr}")
78
  except subprocess.CalledProcessError as e:
79
  print(f"Error running insanely-fast-whisper: {e}")
80
- print(f"Standard output: {e.stdout}")
81
- print(f"Error output: {e.stderr}")
82
  raise
83
- print(f"Reading transcription file: {output_file}")
84
  try:
85
  with open(output_file, "r") as f:
86
  transcription = json.load(f)
87
  except json.JSONDecodeError as e:
88
  print(f"Error decoding JSON: {e}")
89
- print(f"File content: {open(output_file, 'r').read()}")
90
  raise
 
91
  if "text" in transcription:
92
  result = transcription["text"]
93
  else:
94
  result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
95
- print("Transcription completed.")
96
- if file_path.startswith("temp_audio"):
97
- os.remove(file_path)
 
98
  return result
99
 
100
  @spaces.GPU(duration=60)
101
  def generate_summary_stream(transcription):
102
  print("Starting summary generation...")
103
- print(f"Transcription length: {len(transcription)} characters")
104
-
105
  detected_language = langdetect.detect(transcription)
106
 
107
  prompt = f"""Summarize the following video transcription in 150-300 words.
108
  The summary should be in the same language as the transcription, which is detected as {detected_language}.
109
  Please ensure that the summary captures the main points and key ideas of the transcription:
110
 
111
- {transcription[:30000]}..."""
112
 
113
  response, history = model.chat(tokenizer, prompt, history=[])
114
  print(f"Final summary generated: {response[:100]}...")
115
- print("Summary generation completed.")
116
  return response
117
 
118
  def process_youtube(url):
119
  if not url:
120
- print("YouTube URL not provided.")
121
  return "Please enter a YouTube URL.", None
122
- print(f"Processing YouTube URL: {url}")
123
- audio_file = "youtube_audio.wav"
124
  try:
125
- download_youtube_audio(url, audio_file)
126
- # Check if the file was renamed to .wav.wav
127
- if os.path.exists(audio_file + ".wav"):
128
- audio_file = audio_file + ".wav"
129
- if not os.path.exists(audio_file):
130
- raise FileNotFoundError(f"File {audio_file} does not exist after download.")
131
- print(f"Audio file found: {audio_file}")
132
- print("Starting transcription...")
133
  transcription = transcribe_audio(audio_file)
134
- print(f"Transcription completed. Length: {len(transcription)} characters")
135
  return transcription, None
136
  except Exception as e:
137
- print(f"Error processing YouTube: {e}")
138
  return f"Processing error: {str(e)}", None
139
  finally:
140
- if os.path.exists(audio_file):
141
- os.remove(audio_file)
142
- print(f"Directory content after processing: {os.listdir('.')}")
143
 
144
  def process_uploaded_video(video_path):
145
- print(f"Processing uploaded video: {video_path}")
146
  try:
147
- print("Starting transcription...")
148
  transcription = transcribe_audio(video_path)
149
- print(f"Transcription completed. Length: {len(transcription)} characters")
150
  return transcription, None
151
  except Exception as e:
152
- print(f"Error processing video: {e}")
153
  return f"Processing error: {str(e)}", None
 
 
154
 
155
  print("Setting up Gradio interface...")
156
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -193,9 +178,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
193
  def process_video_and_update(video):
194
  if video is None:
195
  return "No video uploaded.", "Please upload a video."
196
- print(f"Video received: {video}")
197
  transcription, _ = process_uploaded_video(video)
198
- print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
199
  return transcription or "Transcription error", ""
200
 
201
  video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
 
10
  import moviepy.editor as mp
11
  import time
12
  import langdetect
13
+ import uuid
14
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
  print("Starting the program...")
 
22
  model = model.eval()
23
  print("Model successfully loaded.")
24
 
25
+ def generate_unique_filename(extension):
26
+ return f"{uuid.uuid4()}{extension}"
27
+
28
+ def cleanup_file(file_path):
29
+ if os.path.exists(file_path):
30
+ os.remove(file_path)
31
+ print(f"Cleaned up file: {file_path}")
32
+
33
+ def download_youtube_audio(url):
34
  print(f"Downloading audio from YouTube: {url}")
35
+ output_path = generate_unique_filename('.wav')
36
  ydl_opts = {
37
  'format': 'bestaudio/best',
38
  'postprocessors': [{
 
44
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
45
  ydl.download([url])
46
 
 
 
 
 
47
  if os.path.exists(output_path):
48
  print(f"Audio download completed. File saved at: {output_path}")
49
  print(f"File size: {os.path.getsize(output_path)} bytes")
50
  else:
51
  print(f"Error: File {output_path} not found after download.")
52
+
53
+ return output_path
54
 
55
  @spaces.GPU(duration=60)
56
  def transcribe_audio(file_path):
 
59
  print("Video file detected. Extracting audio...")
60
  try:
61
  video = mp.VideoFileClip(file_path)
62
+ audio_path = generate_unique_filename('.wav')
63
  video.audio.write_audiofile(audio_path)
64
+ cleanup_file(file_path)
65
  file_path = audio_path
66
  except Exception as e:
67
  print(f"Error extracting audio from video: {e}")
68
  raise
69
+
70
+ output_file = generate_unique_filename('.json')
 
71
  command = [
72
  "insanely-fast-whisper",
73
  "--file-name", file_path,
 
80
  print(f"Executing command: {' '.join(command)}")
81
  try:
82
  result = subprocess.run(command, check=True, capture_output=True, text=True)
 
 
83
  except subprocess.CalledProcessError as e:
84
  print(f"Error running insanely-fast-whisper: {e}")
 
 
85
  raise
86
+
87
  try:
88
  with open(output_file, "r") as f:
89
  transcription = json.load(f)
90
  except json.JSONDecodeError as e:
91
  print(f"Error decoding JSON: {e}")
 
92
  raise
93
+
94
  if "text" in transcription:
95
  result = transcription["text"]
96
  else:
97
  result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
98
+
99
+ cleanup_file(file_path)
100
+ cleanup_file(output_file)
101
+
102
  return result
103
 
104
  @spaces.GPU(duration=60)
105
  def generate_summary_stream(transcription):
106
  print("Starting summary generation...")
 
 
107
  detected_language = langdetect.detect(transcription)
108
 
109
  prompt = f"""Summarize the following video transcription in 150-300 words.
110
  The summary should be in the same language as the transcription, which is detected as {detected_language}.
111
  Please ensure that the summary captures the main points and key ideas of the transcription:
112
 
113
+ {transcription[:300000]}..."""
114
 
115
  response, history = model.chat(tokenizer, prompt, history=[])
116
  print(f"Final summary generated: {response[:100]}...")
 
117
  return response
118
 
119
  def process_youtube(url):
120
  if not url:
 
121
  return "Please enter a YouTube URL.", None
 
 
122
  try:
123
+ audio_file = download_youtube_audio(url)
 
 
 
 
 
 
 
124
  transcription = transcribe_audio(audio_file)
 
125
  return transcription, None
126
  except Exception as e:
 
127
  return f"Processing error: {str(e)}", None
128
  finally:
129
+ cleanup_file(audio_file)
 
 
130
 
131
  def process_uploaded_video(video_path):
 
132
  try:
 
133
  transcription = transcribe_audio(video_path)
 
134
  return transcription, None
135
  except Exception as e:
 
136
  return f"Processing error: {str(e)}", None
137
+ finally:
138
+ cleanup_file(video_path)
139
 
140
  print("Setting up Gradio interface...")
141
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
178
  def process_video_and_update(video):
179
  if video is None:
180
  return "No video uploaded.", "Please upload a video."
 
181
  transcription, _ = process_uploaded_video(video)
 
182
  return transcription or "Transcription error", ""
183
 
184
  video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])