File size: 2,648 Bytes
029a54e
 
433d73e
 
029a54e
 
 
 
 
433d73e
 
 
 
 
 
 
029a54e
 
 
 
 
 
 
433d73e
 
 
 
 
 
 
029a54e
 
433d73e
029a54e
 
 
 
 
 
 
 
 
 
 
 
 
433d73e
029a54e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74a03fa
029a54e
 
 
 
433d73e
 
 
 
 
83bf0a6
433d73e
941fce6
433d73e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

from transformers import pipeline
import base64 
import time
from bs4 import BeautifulSoup
import requests
import streamlit as st
import warnings
warnings.filterwarnings("ignore")


timestr = time.strftime("%Y%m%d-%H%M%S")

st.markdown(' Created by  **_Prathap_**. :baby_chick:')

st.title("Automatic text summarization")

@st.cache(allow_output_mutation=True)
def pipen():
    summarizer = pipeline("summarization")
    return summarizer


def text_downloader(raw_text):
	b64 = base64.b64encode(raw_text.encode()).decode()
	new_filename = "new_text_file_{}_.txt".format(timestr)
	st.markdown("#### Download File ###")
	href = f'<a href="data:file/txt;base64,{b64}" download="{new_filename}">Click Here!!</a>'
	st.markdown(href,unsafe_allow_html=True)



url = st.text_input('Paste URL ⤵️')


if st.button("Submit"):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    results = soup.find_all(['h1', 'p'])
    text = [result.text for result in results]
    ARTICLE = ' '.join(text)
    max_chunk = 400
    ARTICLE = ARTICLE.replace('.', '.<eos>')
    ARTICLE = ARTICLE.replace('?', '?<eos>')
    ARTICLE = ARTICLE.replace('!', '!<eos>')
    

    
    
    sentences = ARTICLE.split('<eos>')
    current_chunk = 0 
    chunks = []
    for sentence in sentences:
        if len(chunks) == current_chunk + 1: 
            if len(chunks[current_chunk]) + len(sentence.split(' '))<= max_chunk:
                chunks[current_chunk].extend(sentence.split(' '))
            else:
                    current_chunk += 1
                    chunks.append(sentence.split(' '))
        else:
            print(current_chunk)
            chunks.append(sentence.split(' '))

    for chunk_id in range(len(chunks)):
        chunks[chunk_id] = ' '.join(chunks[chunk_id])
        
    with st.spinner("Loading the Model into the memory...."):
        model=pipen()
        res = model(chunks, max_length=50, min_length=30, do_sample=False)
        text = ' '.join([summ['summary_text'] for summ in res])
        
        st.write("Success")
        st.write(text)
        text_downloader(text)
        
        
if st.button("Contact"):
        st.write("Hi there, I'm Prathap 👋.  2+ years Applied Deep Learning experience")
        st.write("✅ [LinkedIn](https://linkedin.com/in/prathapreddyk)")
        st.write(" 📚[Github](https://github.com/Pratap517)")
        st.write(" 📗Analyze Csv files in one step [Click Here](https://data-analyse-prathap.herokuapp.com)")
        st.write(" 😷 Face Mask Detection App [Click Here](https://mask-detection-5a800.firebaseapp.com/)")