# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc") def YTVideoToText(video_link): # installing & importing libraries from transformers import pipeline from youtube_transcript_api import YouTubeTranscriptApi # fetching video transcript video_id = video_link.split("=")[1] transcript = YouTubeTranscriptApi.get_transcript(video_id) # iterating throughout and adding all text together result = "" for i in transcript: result += ' ' + i['text'] # summarize text summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn") num_iters = int(len(result)/1000) summarized_text = [] summarized_text2 = [] for i in range(0, num_iters + 1): start = 0 start = i * 1000 end = (i + 1) * 1000 out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False) out = out[0] out = out['summary_text'] summarized_text.append(out) summarized_text2 = ' '.join(summarized_text) # returning summary return [result, summarized_text2]; # How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/") def postSummaryWithBart(blog_link): # importing libraries from transformers import pipeline from bs4 import BeautifulSoup import requests # loading summarization pipeline summarizer = pipeline("summarization") # getting our blog post URL = blog_link r = requests.get(URL) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) # replacing punctuations with end-of-sentence tags ARTICLE = ARTICLE.replace('.', '.') ARTICLE = ARTICLE.replace('?', '?') ARTICLE = ARTICLE.replace('!', '!') sentences = ARTICLE.split('') # chunking text max_chunk = 500 current_chunk = 0 chunks = [] for sentence in sentences: # checking if we have an empty chunk if len(chunks) == current_chunk + 1: if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: chunks[current_chunk].extend(sentence.split(' ')) else: current_chunk += 1 chunks.append(sentence.split(' ')) else: print(current_chunk) chunks.append(sentence.split(' ')) for chunk_id in range(len(chunks)): chunks[chunk_id] = ' '.join(chunks[chunk_id]) # summarizing text res = summarizer(chunks, max_length=70, min_length=30, do_sample=False) text = ''.join([summ['summary_text'] for summ in res]) # returning summary return [sentences, text]; # How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""") def abstractiveSummaryWithPegasus(words): # importing & loading model from transformers import PegasusForConditionalGeneration, PegasusTokenizer tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") # perform summarization tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt") summary = model.generate(**tokens) actual_summ = tokenizer.decode(summary[0]) # returning summary print(actual_summ) # Main logic of the program def process(uri, mode): if mode == "Youtube": return YTVideoToText(uri) elif mode == "Blog": return postSummaryWithBart(uri) else: raise ValueError("Invalid mode") import gradio as gr from gradio.mix import Series with gr.Blocks() as ui: gr.Markdown(""" ## Permet de faire le résumé d'une video youtube ou d'un article de blog """) with gr.Row(): with gr.Column(): URI = gr.Textbox( label="URI à résumer", max_lines=1, placeholder="https://youtube|website.ext", ) TRANSCRIPT = gr.Textbox( label="URI à résumer", lines=10, placeholder="https://youtube|website.ext", ) RESUME = gr.Textbox( label="URI à résumer", lines=10, interactive=False, placeholder="https://youtube|website.ext", ) with gr.Column(): MODE = gr.Radio(choices=["Youtube", "Blog", "Text"]) gr.Button("Process URI").click( fn=process, inputs=[URI, MODE], outputs=[TRANSCRIPT, RESUME] ) gr.Button("Process TEXT").click( fn=abstractiveSummaryWithPegasus, inputs=[TRANSCRIPT], outputs=[RESUME] ) #translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en") #summarizer = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-12-6") ui.launch()