Spaces:

artificialguybr
/

Video-Transcription-Smart-Summary

Running on Zero

App Files Files Community

artificialguybr commited on Jul 4, 2024

Commit

bdd072a

verified ·

1 Parent(s): 240de18

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -45

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import spaces
 import moviepy.editor as mp
 import time
 import langdetect
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("Starting the program...")
@@ -21,8 +22,17 @@ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float
 model = model.eval()
 print("Model successfully loaded.")
-def download_youtube_audio(url, output_path):
     print(f"Downloading audio from YouTube: {url}")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
@@ -34,16 +44,13 @@ def download_youtube_audio(url, output_path):
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
-    # Check if the file was renamed to .wav.wav
-    if os.path.exists(output_path + ".wav"):
-        os.rename(output_path + ".wav", output_path)
     if os.path.exists(output_path):
         print(f"Audio download completed. File saved at: {output_path}")
         print(f"File size: {os.path.getsize(output_path)} bytes")
     else:
         print(f"Error: File {output_path} not found after download.")
 @spaces.GPU(duration=60)
 def transcribe_audio(file_path):
@@ -52,15 +59,15 @@ def transcribe_audio(file_path):
         print("Video file detected. Extracting audio...")
         try:
             video = mp.VideoFileClip(file_path)
-            audio_path = "temp_audio.wav"
             video.audio.write_audiofile(audio_path)
             file_path = audio_path
         except Exception as e:
             print(f"Error extracting audio from video: {e}")
             raise
-    print(f"Does the file exist? {os.path.exists(file_path)}")
-    print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
-    output_file = "output.json"
     command = [
         "insanely-fast-whisper",
         "--file-name", file_path,
@@ -73,84 +80,62 @@ def transcribe_audio(file_path):
     print(f"Executing command: {' '.join(command)}")
     try:
         result = subprocess.run(command, check=True, capture_output=True, text=True)
-        print(f"Standard output: {result.stdout}")
-        print(f"Error output: {result.stderr}")
     except subprocess.CalledProcessError as e:
         print(f"Error running insanely-fast-whisper: {e}")
-        print(f"Standard output: {e.stdout}")
-        print(f"Error output: {e.stderr}")
         raise
-    print(f"Reading transcription file: {output_file}")
     try:
         with open(output_file, "r") as f:
             transcription = json.load(f)
     except json.JSONDecodeError as e:
         print(f"Error decoding JSON: {e}")
-        print(f"File content: {open(output_file, 'r').read()}")
         raise
     if "text" in transcription:
         result = transcription["text"]
     else:
         result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
-    print("Transcription completed.")
-    if file_path.startswith("temp_audio"):
-        os.remove(file_path)
     return result
 @spaces.GPU(duration=60)
 def generate_summary_stream(transcription):
     print("Starting summary generation...")
-    print(f"Transcription length: {len(transcription)} characters")
     detected_language = langdetect.detect(transcription)
     prompt = f"""Summarize the following video transcription in 150-300 words.
     The summary should be in the same language as the transcription, which is detected as {detected_language}.
     Please ensure that the summary captures the main points and key ideas of the transcription:
-    {transcription[:30000]}..."""
     response, history = model.chat(tokenizer, prompt, history=[])
     print(f"Final summary generated: {response[:100]}...")
-    print("Summary generation completed.")
     return response
 def process_youtube(url):
     if not url:
-        print("YouTube URL not provided.")
         return "Please enter a YouTube URL.", None
-    print(f"Processing YouTube URL: {url}")
-    audio_file = "youtube_audio.wav"
     try:
-        download_youtube_audio(url, audio_file)
-        # Check if the file was renamed to .wav.wav
-        if os.path.exists(audio_file + ".wav"):
-            audio_file = audio_file + ".wav"
-        if not os.path.exists(audio_file):
-            raise FileNotFoundError(f"File {audio_file} does not exist after download.")
-        print(f"Audio file found: {audio_file}")
-        print("Starting transcription...")
         transcription = transcribe_audio(audio_file)
-        print(f"Transcription completed. Length: {len(transcription)} characters")
         return transcription, None
     except Exception as e:
-        print(f"Error processing YouTube: {e}")
         return f"Processing error: {str(e)}", None
     finally:
-        if os.path.exists(audio_file):
-            os.remove(audio_file)
-        print(f"Directory content after processing: {os.listdir('.')}")
 def process_uploaded_video(video_path):
-    print(f"Processing uploaded video: {video_path}")
     try:
-        print("Starting transcription...")
         transcription = transcribe_audio(video_path)
-        print(f"Transcription completed. Length: {len(transcription)} characters")
         return transcription, None
     except Exception as e:
-        print(f"Error processing video: {e}")
         return f"Processing error: {str(e)}", None
 print("Setting up Gradio interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -193,9 +178,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def process_video_and_update(video):
         if video is None:
             return "No video uploaded.", "Please upload a video."
-        print(f"Video received: {video}")
         transcription, _ = process_uploaded_video(video)
-        print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
         return transcription or "Transcription error", ""
     video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])

 import moviepy.editor as mp
 import time
 import langdetect
+import uuid
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("Starting the program...")
 model = model.eval()
 print("Model successfully loaded.")
+def generate_unique_filename(extension):
+    return f"{uuid.uuid4()}{extension}"
+def cleanup_file(file_path):
+    if os.path.exists(file_path):
+        os.remove(file_path)
+        print(f"Cleaned up file: {file_path}")
+def download_youtube_audio(url):
     print(f"Downloading audio from YouTube: {url}")
+    output_path = generate_unique_filename('.wav')
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         ydl.download([url])
     if os.path.exists(output_path):
         print(f"Audio download completed. File saved at: {output_path}")
         print(f"File size: {os.path.getsize(output_path)} bytes")
     else:
         print(f"Error: File {output_path} not found after download.")
+    return output_path
 @spaces.GPU(duration=60)
 def transcribe_audio(file_path):
         print("Video file detected. Extracting audio...")
         try:
             video = mp.VideoFileClip(file_path)
+            audio_path = generate_unique_filename('.wav')
             video.audio.write_audiofile(audio_path)
+            cleanup_file(file_path)
             file_path = audio_path
         except Exception as e:
             print(f"Error extracting audio from video: {e}")
             raise
+    output_file = generate_unique_filename('.json')
     command = [
         "insanely-fast-whisper",
         "--file-name", file_path,
     print(f"Executing command: {' '.join(command)}")
     try:
         result = subprocess.run(command, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         print(f"Error running insanely-fast-whisper: {e}")
         raise
     try:
         with open(output_file, "r") as f:
             transcription = json.load(f)
     except json.JSONDecodeError as e:
         print(f"Error decoding JSON: {e}")
         raise
     if "text" in transcription:
         result = transcription["text"]
     else:
         result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
+    cleanup_file(file_path)
+    cleanup_file(output_file)
     return result
 @spaces.GPU(duration=60)
 def generate_summary_stream(transcription):
     print("Starting summary generation...")
     detected_language = langdetect.detect(transcription)
     prompt = f"""Summarize the following video transcription in 150-300 words.
     The summary should be in the same language as the transcription, which is detected as {detected_language}.
     Please ensure that the summary captures the main points and key ideas of the transcription:
+    {transcription[:300000]}..."""
     response, history = model.chat(tokenizer, prompt, history=[])
     print(f"Final summary generated: {response[:100]}...")
     return response
 def process_youtube(url):
     if not url:
         return "Please enter a YouTube URL.", None
     try:
+        audio_file = download_youtube_audio(url)
         transcription = transcribe_audio(audio_file)
         return transcription, None
     except Exception as e:
         return f"Processing error: {str(e)}", None
     finally:
+        cleanup_file(audio_file)
 def process_uploaded_video(video_path):
     try:
         transcription = transcribe_audio(video_path)
         return transcription, None
     except Exception as e:
         return f"Processing error: {str(e)}", None
+    finally:
+        cleanup_file(video_path)
 print("Setting up Gradio interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def process_video_and_update(video):
         if video is None:
             return "No video uploaded.", "Please upload a video."
         transcription, _ = process_uploaded_video(video)
         return transcription or "Transcription error", ""
     video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])