Spaces:

dioarafl
/

summarizedYtb

Runtime error

App Files Files Community

dioarafl commited on May 13, 2024

Commit

de9ee5d

verified ·

1 Parent(s): d895af0

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -45

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import (
     AutoModelForSpeechSeq2Seq,
     AutoProcessor,
     AutoModelForCausalLM,
-    AutoTokenizer,
     BitsAndBytesConfig,
 )
 import torch
@@ -17,16 +17,12 @@ def yt2mp3(url, outputMp3F):
 def speech2text(mp3_file):
-    # Set the computation device to GPU (if available) or CPU
     device = 'cuda:0'
-    # Choose data type based on CUDA availability (float16 for GPU, float32 for CPU)
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    # Model identifier for the speech-to-text model
     model_id = "distil-whisper/distil-large-v2"
-    # Load the model with specified configurations for efficient processing
     model = AutoModelForSpeechSeq2Seq.from_pretrained(
         model_id,
         torch_dtype=torch_dtype,
@@ -35,13 +31,10 @@ def speech2text(mp3_file):
         use_flash_attention_2=True
     )
-    # Move the model to the specified device (GPU/CPU)
     model.to(device)
-    # Load the processor for the model (handling tokenization and feature extraction)
     processor = AutoProcessor.from_pretrained(model_id)
-    # Set up a speech recognition pipeline with the model and processor
     pipe = pipeline(
         "automatic-speech-recognition",
         model=model,
@@ -54,35 +47,23 @@ def speech2text(mp3_file):
         device=device,
     )
-    # Process the MP3 file through the pipeline to get the speech recognition result
     result = pipe(mp3_file)
-    # Extract the text from the recognition result
     text_from_video = result["text"]
-    # Return the extracted text
     return text_from_video
 def chat(system_prompt, text):
-    """
-    It is not a good practice to load the model again and again,
-    but for the sake of simlicity for demo, let's keep as it is
-    """
-    # Define the model name to be used for the chat function
     model_name = "meta-llama/Llama-2-7b-chat-hf"
-    # Authentication token for Hugging Face API
     token = os.environ['HUGGINGFACE_TOKEN']
-    # Configure the model to load in a quantized 8-bit format for efficiency
     bnb_config = BitsAndBytesConfig(
         load_in_8bit=True
     )
-    # Set the device map to load the model on GPU 0
     device_map = {"": 0}
-    # Load the model from Hugging Face with the specified configuration
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
@@ -90,13 +71,10 @@ def chat(system_prompt, text):
         use_auth_token=token
     )
-    # Load the tokenizer for the model
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
-    # Create a text-generation pipeline with the loaded model and tokenizer
     llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
-    # Format the input text with special tokens for the model
     text = f"""
     <s>[INST] <<SYS>>
     {system_prompt}
@@ -104,7 +82,6 @@ def chat(system_prompt, text):
     {text}[/INST]
     """
-    # Generate sequences using the pipeline with specified parameters
     sequences = llama_pipeline(
         text,
         do_sample=True,
@@ -114,54 +91,38 @@ def chat(system_prompt, text):
         max_length=32000
     )
-    # Extract the generated text from the sequences
     generated_text = sequences[0]["generated_text"]
-    # Trim the generated text to remove the instruction part
     generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):]
-    # Return the processed generated text
     return generated_text
 def summarize(text):
-    # Define the maximum input length for each iteration of summarization
     input_len = 10000
-    # Start an infinite loop to repeatedly summarize the text
     while True:
-        # Print the current length of the text
-        print(len(text))
-        # Call the chat function to summarize the text. Only the first 'input_len' characters are considered for summarization
         summary = chat("", "Summarize the following: " + text[0:input_len])
         if len(text) < input_len:
             return summary
-        # Concatenate the current summary with the remaining part of the text for the next iteration
         text = summary + " " + text[input_len:]
 import gradio as gr
-# Fungsi dan impor yang sudah Anda miliki sebelumnya
-# Fungsi untuk merangkum teks dari URL YouTube
 def summarize_from_youtube(url):
-    # Unduh audio dari URL YouTube dan transkripsi ucapan menjadi teks
     outputMp3F = "./files/audio.mp3"
     yt2mp3(url=url, outputMp3F=outputMp3F)
     transcribed = speech2text(mp3_file=outputMp3F)
-    # Rangkum teks yang telah ditranskripsi
     summary = summarize(transcribed)
     return summary
-# Konfigurasi antarmuka Gradio
-youtube_url = gr.inputs.Textbox(lines=1, label="Masukkan URL YouTube")
-output_text = gr.outputs.Textbox(label="Ringkasan")
-# Membuat antarmuka Gradio
 gr.Interface(
     fn=summarize_from_youtube,
     inputs=youtube_url,
     outputs=output_text,
-    title="Peringkas YouTube",
-    description="Masukkan URL YouTube untuk merangkum kontennya."
 ).launch()

     AutoModelForSpeechSeq2Seq,
     AutoProcessor,
     AutoModelForCausalLM,
+    AutoTokenizer,
     BitsAndBytesConfig,
 )
 import torch
 def speech2text(mp3_file):
     device = 'cuda:0'
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     model_id = "distil-whisper/distil-large-v2"
     model = AutoModelForSpeechSeq2Seq.from_pretrained(
         model_id,
         torch_dtype=torch_dtype,
         use_flash_attention_2=True
     )
     model.to(device)
     processor = AutoProcessor.from_pretrained(model_id)
     pipe = pipeline(
         "automatic-speech-recognition",
         model=model,
         device=device,
     )
     result = pipe(mp3_file)
     text_from_video = result["text"]
     return text_from_video
 def chat(system_prompt, text):
     model_name = "meta-llama/Llama-2-7b-chat-hf"
     token = os.environ['HUGGINGFACE_TOKEN']
     bnb_config = BitsAndBytesConfig(
         load_in_8bit=True
     )
     device_map = {"": 0}
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         use_auth_token=token
     )
     tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
     llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
     text = f"""
     <s>[INST] <<SYS>>
     {system_prompt}
     {text}[/INST]
     """
     sequences = llama_pipeline(
         text,
         do_sample=True,
         max_length=32000
     )
     generated_text = sequences[0]["generated_text"]
     generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):]
     return generated_text
 def summarize(text):
     input_len = 10000
     while True:
         summary = chat("", "Summarize the following: " + text[0:input_len])
         if len(text) < input_len:
             return summary
         text = summary + " " + text[input_len:]
 import gradio as gr
 def summarize_from_youtube(url):
     outputMp3F = "./files/audio.mp3"
     yt2mp3(url=url, outputMp3F=outputMp3F)
     transcribed = speech2text(mp3_file=outputMp3F)
     summary = summarize(transcribed)
     return summary
+youtube_url = gr.inputs.Textbox(lines=1, label="Enter YouTube URL")
+output_text = gr.outputs.Textbox(label="Summary")
 gr.Interface(
     fn=summarize_from_youtube,
     inputs=youtube_url,
     outputs=output_text,
+    title="YouTube Summarizer",
+    description="Enter a YouTube URL to summarize its content."
 ).launch()