Spaces:
Runtime error
Runtime error
File size: 3,197 Bytes
199a0ec de9ee5d 199a0ec de9ee5d 199a0ec 089d664 199a0ec 28c7a64 de9ee5d 199a0ec de9ee5d 28c7a64 199a0ec 28c7a64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
from transformers import (
pipeline,
AutoModelForSpeechSeq2Seq,
AutoProcessor,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
import torch
import os
import random
def yt2mp3(url, outputMp3F):
tmpVideoF=random.random()
os.system(f"./bin/youtube-dl -o /tmp/{tmpVideoF} --verbose " + url)
os.system(f"ffmpeg -y -i /tmp/{tmpVideoF}.* -vn -ar 44100 -ac 2 -b:a 192k {outputMp3F}")
def speech2text(mp3_file):
device = 'cuda:0'
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "distil-whisper/distil-large-v2"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id,
torch_dtype=torch_dtype,
low_cpu_mem_usage=True,
use_safetensors=True,
use_flash_attention_2=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=15,
batch_size=16,
torch_dtype=torch_dtype,
device=device,
)
result = pipe(mp3_file)
text_from_video = result["text"]
return text_from_video
def chat(system_prompt, text):
model_name = "meta-llama/Llama-2-7b-chat-hf"
token = os.environ['HUGGINGFACE_TOKEN']
bnb_config = BitsAndBytesConfig(
load_in_8bit=True
)
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map=device_map,
use_auth_token=token
)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
text = f"""
<s>[INST] <<SYS>>
{system_prompt}
<</SYS>>
{text}[/INST]
"""
sequences = llama_pipeline(
text,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=32000
)
generated_text = sequences[0]["generated_text"]
generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):]
return generated_text
def summarize(text):
input_len = 10000
while True:
summary = chat("", "Summarize the following: " + text[0:input_len])
if len(text) < input_len:
return summary
text = summary + " " + text[input_len:]
import gradio as gr
import gradio.inputs as inputs
def summarize_from_youtube(url):
outputMp3F = "./files/audio.mp3"
yt2mp3(url=url, outputMp3F=outputMp3F)
transcribed = speech2text(mp3_file=outputMp3F)
summary = summarize(transcribed)
return summary
youtube_url = gr.inputs.Textbox(lines=1, label="Masukkan URL YouTube")
output_text = gr.outputs.Textbox(label="Summary")
gr.Interface(
fn=summarize_from_youtube,
inputs=youtube_url,
outputs=output_text,
title="YouTube Summarizer",
description="Masukkan URL YouTube untuk merangkum kontennya."
).launch()
|