Spaces:

dioarafl
/

summarizedYtb

Runtime error

App Files Files Community

summarizedYtb / app.py

dioarafl

Create app.py

199a0ec verified 3 months ago

raw

history blame

No virus

5.39 kB

	from transformers import (
	pipeline,
	AutoModelForSpeechSeq2Seq,
	AutoProcessor,
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	)
	import torch
	import os
	import random

	def yt2mp3(url, outputMp3F):
	tmpVideoF=random.random()
	os.system(f"./bin/youtube-dl -o /tmp/{tmpVideoF} --verbose " + url)
	os.system(f"ffmpeg -y -i /tmp/{tmpVideoF}.* -vn -ar 44100 -ac 2 -b:a 192k {outputMp3F}")


	def speech2text(mp3_file):
	# Set the computation device to GPU (if available) or CPU
	device = 'cuda:0'

	# Choose data type based on CUDA availability (float16 for GPU, float32 for CPU)
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# Model identifier for the speech-to-text model
	model_id = "distil-whisper/distil-large-v2"

	# Load the model with specified configurations for efficient processing
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	use_safetensors=True,
	use_flash_attention_2=True
	)

	# Move the model to the specified device (GPU/CPU)
	model.to(device)

	# Load the processor for the model (handling tokenization and feature extraction)
	processor = AutoProcessor.from_pretrained(model_id)

	# Set up a speech recognition pipeline with the model and processor
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=15,
	batch_size=16,
	torch_dtype=torch_dtype,
	device=device,
	)

	# Process the MP3 file through the pipeline to get the speech recognition result
	result = pipe(mp3_file)

	# Extract the text from the recognition result
	text_from_video = result["text"]

	# Return the extracted text
	return text_from_video


	def chat(system_prompt, text):
	"""
	It is not a good practice to load the model again and again,
	but for the sake of simlicity for demo, let's keep as it is
	"""

	# Define the model name to be used for the chat function
	model_name = "meta-llama/Llama-2-7b-chat-hf"
	# Authentication token for Hugging Face API
	token = os.environ['HUGGINGFACE_TOKEN']

	# Configure the model to load in a quantized 8-bit format for efficiency
	bnb_config = BitsAndBytesConfig(
	load_in_8bit=True
	)

	# Set the device map to load the model on GPU 0
	device_map = {"": 0}
	# Load the model from Hugging Face with the specified configuration
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=bnb_config,
	device_map=device_map,
	use_auth_token=token
	)

	# Load the tokenizer for the model
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)

	# Create a text-generation pipeline with the loaded model and tokenizer
	llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

	# Format the input text with special tokens for the model
	text = f"""
	<s>[INST] <<SYS>>
	{system_prompt}
	<</SYS>>
	{text}[/INST]
	"""

	# Generate sequences using the pipeline with specified parameters
	sequences = llama_pipeline(
	text,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	max_length=32000
	)

	# Extract the generated text from the sequences
	generated_text = sequences[0]["generated_text"]
	# Trim the generated text to remove the instruction part
	generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):]

	# Return the processed generated text
	return generated_text

	def summarize(text):
	# Define the maximum input length for each iteration of summarization
	input_len = 10000

	# Start an infinite loop to repeatedly summarize the text
	while True:
	# Print the current length of the text
	print(len(text))
	# Call the chat function to summarize the text. Only the first 'input_len' characters are considered for summarization
	summary = chat("", "Summarize the following: " + text[0:input_len])

	if len(text) < input_len:
	return summary

	# Concatenate the current summary with the remaining part of the text for the next iteration
	text = summary + " " + text[input_len:]

	import gradio as gr

	# Fungsi dan impor yang sudah Anda miliki sebelumnya

	# Fungsi untuk merangkum teks dari URL YouTube
	def summarize_from_youtube(url):
	# Unduh audio dari URL YouTube dan transkripsi ucapan menjadi teks
	outputMp3F = "./files/audio.mp3"
	yt2mp3(url=url, outputMp3F=outputMp3F)
	transcribed = speech2text(mp3_file=outputMp3F)
	# Rangkum teks yang telah ditranskripsi
	summary = summarize(transcribed)
	return summary

	# Konfigurasi antarmuka Gradio
	youtube_url = gr.inputs.Textbox(lines=1, label="Masukkan URL YouTube")
	output_text = gr.outputs.Textbox(label="Ringkasan")

	# Membuat antarmuka Gradio
	gr.Interface(
	fn=summarize_from_youtube,
	inputs=youtube_url,
	outputs=output_text,
	title="Peringkas YouTube",
	description="Masukkan URL YouTube untuk merangkum kontennya."
	).launch()