Spaces:

Devika
/

Briefly

Runtime error

Briefly / app.py

Devika Nair M

Rename Briefly.py to app.py

3e407e8 unverified over 2 years ago

No virus

4.86 kB

	import streamlit as st #Web App
	from gnewsclient import gnewsclient # for fetching google news
	from newspaper import Article # to obtain text from news articles
	from transformers import pipeline # to summarize text
	import spacy # to obtain keyword
	from annotated_text import annotated_text # to display keywords


	# Load sshleifer/distilbart-cnn-12-6 model
	@st.cache(allow_output_mutation=True)
	def load_model():
	model = pipeline("summarization")
	return model

	data = gnewsclient.NewsClient(max_results=0)

	#faster method - inference api - 30k characters/mo
	#API_URL = "https://api-inference.huggingface.co/models/sshleifer/distilbart-cnn-12-6"
	#API_KEY=os.getenv("API_KEY")
	#headers = {"Authorization": f"Bearer {API_KEY}"}
	#def query(payload):
	# response = requests.post(API_URL, headers=headers, json=payload)
	# return response.json()


	# obtain urls and it's content
	def getNews(topic,location):
	count=0
	contents=[]
	titles=[]
	authors=[]
	urls=[]
	data = gnewsclient.NewsClient(language='english',location=location,topic=topic,max_results=10)
	news = data.get_news()
	for item in news:
	url=item['link']
	article = Article(url)
	try:
	article.download()
	article.parse()
	temp=item['title'][::-1]
	index=temp.find("-")
	temp=temp[:index-1][::-1]
	urls.append(url)
	contents.append(article.text)
	titles.append(item['title'][:-index-1])
	authors.append(temp)
	count+=1
	if(count==5):
	break
	except:
	continue
	return contents,titles,authors,urls


	# Summarizes the content- minimum word limit 30 and maximum 60
	def getNewsSummary(contents,summarizer):
	summaries=[]
	for content in contents:
	minimum=len(content.split())
	summaries.append(summarizer(content,max_length=60,min_length=min(30,minimum),do_sample=False,truncation=True)[0]['summary_text'])
	return summaries


	# Obtain 4 keywords from content (person,organisation or geopolitical entity)
	def generateKeyword(contents):
	keywords=[]
	words=[]
	nlp = spacy.load("en_core_web_lg")
	labels=["PERSON","ORG","GPE"]
	for content in contents:
	doc=nlp(content)
	keys=[]
	limit=0
	for ent in doc.ents:
	key=ent.text.upper()
	label=ent.label_
	if(key not in words and key not in keywords and label in labels):
	keys.append(key)
	limit+=1
	for element in key.split():
	words.append(element)
	if(limit==4):
	keywords.append(keys)
	break
	return keywords


	# Display title,author and summary in streamlit
	def DisplaySummary(titles,authors,summaries,keywords,urls):
	for i in range(5):
	if(i+1<=len(summaries) and i+1<=len(keywords)):
	st.text("")
	st.subheader(f'[{titles[i]}] ({urls[i]})')
	st.markdown(f'<b>{authors[i]}</b>',unsafe_allow_html=True)
	st.write(summaries[i])
	if(len(keywords[i])==4):
	annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa")," ",(keywords[i][3],"","#faa"))
	elif(len(keywords[i])==3):
	annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa"))
	elif(len(keywords[i])==2):
	annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa"))
	elif(len(keywords[i])==1):
	annotated_text("KEYWORDS :",(keywords[i][0],"","#faa"))
	st.text("")
	st.text("")


	def main():
	summarizer=load_model()
	st.title('Briefly')
	with st.expander('Read trending news in less than 60 words...', expanded=True):
	with st.form(key='form1'):
	topic=st.selectbox('Category:',data.topics[2:]+["World"])
	location=st.selectbox('Location:',data.locations)
	submit_button=st.form_submit_button()

	if submit_button:
	with st.spinner('Fetching news...'):
	contents,titles,authors,urls=getNews(topic,location)
	summaries=getNewsSummary(contents,summarizer)
	keywords=generateKeyword(contents)
	DisplaySummary(titles,authors,summaries,keywords,urls)


	if __name__ == '__main__':
	main()