|
import streamlit as st
|
|
from gnewsclient import gnewsclient
|
|
from newspaper import Article
|
|
from transformers import pipeline
|
|
import spacy
|
|
from annotated_text import annotated_text
|
|
|
|
|
|
|
|
@st.cache(allow_output_mutation=True)
|
|
def load_model():
|
|
model = pipeline("summarization")
|
|
return model
|
|
|
|
data = gnewsclient.NewsClient(max_results=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getNews(topic,location):
|
|
count=0
|
|
contents=[]
|
|
titles=[]
|
|
authors=[]
|
|
urls=[]
|
|
data = gnewsclient.NewsClient(language='english',location=location,topic=topic,max_results=10)
|
|
news = data.get_news()
|
|
for item in news:
|
|
url=item['link']
|
|
article = Article(url)
|
|
try:
|
|
article.download()
|
|
article.parse()
|
|
temp=item['title'][::-1]
|
|
index=temp.find("-")
|
|
temp=temp[:index-1][::-1]
|
|
urls.append(url)
|
|
contents.append(article.text)
|
|
titles.append(item['title'][:-index-1])
|
|
authors.append(temp)
|
|
count+=1
|
|
if(count==5):
|
|
break
|
|
except:
|
|
continue
|
|
return contents,titles,authors,urls
|
|
|
|
|
|
|
|
def getNewsSummary(contents,summarizer):
|
|
summaries=[]
|
|
for content in contents:
|
|
minimum=len(content.split())
|
|
summaries.append(summarizer(content,max_length=60,min_length=min(30,minimum),do_sample=False,truncation=True)[0]['summary_text'])
|
|
return summaries
|
|
|
|
|
|
|
|
def generateKeyword(contents):
|
|
keywords=[]
|
|
words=[]
|
|
nlp = spacy.load("en_core_web_lg")
|
|
labels=["PERSON","ORG","GPE"]
|
|
for content in contents:
|
|
doc=nlp(content)
|
|
keys=[]
|
|
limit=0
|
|
for ent in doc.ents:
|
|
key=ent.text.upper()
|
|
label=ent.label_
|
|
if(key not in words and key not in keywords and label in labels):
|
|
keys.append(key)
|
|
limit+=1
|
|
for element in key.split():
|
|
words.append(element)
|
|
if(limit==4):
|
|
keywords.append(keys)
|
|
break
|
|
return keywords
|
|
|
|
|
|
|
|
def DisplaySummary(titles,authors,summaries,keywords,urls):
|
|
for i in range(5):
|
|
if(i+1<=len(summaries) and i+1<=len(keywords)):
|
|
st.text("")
|
|
st.subheader(f'[{titles[i]}] ({urls[i]})')
|
|
st.markdown(f'<b>{authors[i]}</b>',unsafe_allow_html=True)
|
|
st.write(summaries[i])
|
|
if(len(keywords[i])==4):
|
|
annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa")," ",(keywords[i][3],"","#faa"))
|
|
elif(len(keywords[i])==3):
|
|
annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa"))
|
|
elif(len(keywords[i])==2):
|
|
annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa"))
|
|
elif(len(keywords[i])==1):
|
|
annotated_text("KEYWORDS :",(keywords[i][0],"","#faa"))
|
|
st.text("")
|
|
st.text("")
|
|
|
|
|
|
def main():
|
|
summarizer=load_model()
|
|
st.title('Briefly')
|
|
with st.expander('Read trending news in less than 60 words...', expanded=True):
|
|
with st.form(key='form1'):
|
|
topic=st.selectbox('Category:',data.topics[2:]+["World"])
|
|
location=st.selectbox('Location:',data.locations)
|
|
submit_button=st.form_submit_button()
|
|
|
|
if submit_button:
|
|
with st.spinner('Fetching news...'):
|
|
contents,titles,authors,urls=getNews(topic,location)
|
|
summaries=getNewsSummary(contents,summarizer)
|
|
keywords=generateKeyword(contents)
|
|
DisplaySummary(titles,authors,summaries,keywords,urls)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|