File size: 5,055 Bytes
198c7f7
 
 
 
34f8e9f
 
 
 
 
609c3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
34f8e9f
609c3ee
 
 
 
 
 
 
34f8e9f
609c3ee
 
 
 
 
 
90e4331
609c3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d48c9b
609c3ee
 
 
 
 
0d48c9b
609c3ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34f8e9f
609c3ee
 
 
90e4331
609c3ee
 
 
 
 
198c7f7
609c3ee
 
 
 
34f8e9f
609c3ee
 
 
34f8e9f
609c3ee
 
edbb831
861ee8a
 
 
 
 
 
 
 
 
 
34f8e9f
861ee8a
 
34f8e9f
861ee8a
 
34f8e9f
861ee8a
 
34f8e9f
861ee8a
 
 
34f8e9f
861ee8a
90e4331
edbb831
 
 
 
 
34f8e9f
edbb831
 
 
34f8e9f
 
90e4331
 
 
 
34f8e9f
 
90e4331
861ee8a
 
 
0d48c9b
861ee8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
from transformers import pipeline, PegasusForConditionalGeneration


# Initialize the space
summarizeryt = pipeline("summarization", model="facebook/bart-large-cnn")
summarizerbg = pipeline("summarization")
summarizertx = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
def YTVideoToText(video_link):
    # installing & importing libraries
    from youtube_transcript_api import YouTubeTranscriptApi

    # fetching video transcript
    video_id = video_link.split("=")[1]
    transcript = YouTubeTranscriptApi.get_transcript(video_id)

    # iterating throughout and adding all text together
    result = ""
    for i in transcript:
        result += ' ' + i['text']

    # summarize text    
    num_iters = int(len(result)/1000)
    summarized_text = []
    summarized_text2 = []
    for i in range(0, num_iters + 1):
        start = 0
        start = i * 1000
        end = (i + 1) * 1000
        out = summarizeryt(result[start:end], max_new_tokens=130, min_length=30, do_sample=False)
        out = out[0]
        out = out['summary_text']
        summarized_text.append(out)
        summarized_text2 = ' '.join(summarized_text)

    # returning summary
    return [result, summarized_text2];


# How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/")
def postSummaryWithBart(blog_link):
    # importing libraries
    from bs4 import BeautifulSoup
    import requests

    # getting our blog post
    URL = blog_link
    r = requests.get(URL)
    soup = BeautifulSoup(r.text, 'html.parser')
    results = soup.find_all(['h1', 'p'])
    text = [result.text for result in results]
    ARTICLE = ' '.join(text)

    # replacing punctuations with end-of-sentence tags
    ARTICLE = ARTICLE.replace('.', '.')
    ARTICLE = ARTICLE.replace('?', '?')
    ARTICLE = ARTICLE.replace('!', '!')
    sentences = ARTICLE.split('')

    # chunking text
    max_chunk = 500
    current_chunk = 0 
    chunks = []
    for sentence in sentences:
        # checking if we have an empty chunk 
        if len(chunks) == current_chunk + 1: 
            if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
                chunks[current_chunk].extend(sentence.split(' '))
            else:
                current_chunk += 1
                chunks.append(sentence.split(' '))
        else:
            print(current_chunk)
            chunks.append(sentence.split(' '))
    for chunk_id in range(len(chunks)):
        chunks[chunk_id] = ' '.join(chunks[chunk_id])

    # summarizing text
    res = summarizerbg(chunks, max_new_tokens=1024, min_length=30, do_sample=False)
    text = ''.join([summ['summary_text'] for summ in res])

    # returning summary
    return [sentences, text];


# How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""")
def abstractiveSummaryWithPegasus(words):
    # importing & loading model
    from transformers import PegasusTokenizer
    tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

    # perform summarization
    tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
    summary = summarizertx.generate(**tokens)
    actual_summ = tokenizer.decode(summary[0])

    # returning summary
    return actual_summ


# Main UI
with gr.Blocks() as ui:
    gr.Markdown("""
    ## Permet de faire le résumé d'une video youtube ou d'un article de blog
    """)
    with gr.Row():
        with gr.Column():
            URI = gr.Textbox(
                label="URI à résumer",
                max_lines=1,
                placeholder="https://youtube|website.ext",
                api_name="uri"
            )
            TRANSCRIPT = gr.Textbox(
                label="Transcript à résumer",
                lines=10,
                placeholder="https://youtube|website.ext",
                api_name="transcript"
            )
            RESUME = gr.Textbox(
                label="Résumé",
                lines=10,
                interactive=False,
                placeholder="https://youtube|website.ext",
                api_name="resume"
            )
        with gr.Column():
            gr.Button("Process Youtube").click(
                fn=YTVideoToText,
                inputs=[URI],
                outputs=[TRANSCRIPT, RESUME],
                api_name="process_uri"
            )
            gr.Button("Process HTML").click(
                fn=postSummaryWithBart,
                inputs=[URI],
                outputs=[TRANSCRIPT, RESUME],
                api_name="process_uri"
            )
            gr.Button("Process TEXT").click(
                fn=abstractiveSummaryWithPegasus,
                inputs=[TRANSCRIPT],
                outputs=[RESUME],
                api_name="process_text"
            )
    
#translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en")
#summarizer = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-12-6")

ui.launch()