import streamlit as st #Web App from gnewsclient import gnewsclient # for fetching google news from newspaper import Article # to obtain text from news articles from transformers import pipeline # to summarize text import spacy # to obtain keyword from annotated_text import annotated_text # to display keywords # Load sshleifer/distilbart-cnn-12-6 model @st.cache(allow_output_mutation=True) def load_model(): model = pipeline("summarization") return model data = gnewsclient.NewsClient(max_results=0) #faster method - inference api - 30k characters/mo #API_URL = "https://api-inference.huggingface.co/models/sshleifer/distilbart-cnn-12-6" #API_KEY=os.getenv("API_KEY") #headers = {"Authorization": f"Bearer {API_KEY}"} #def query(payload): # response = requests.post(API_URL, headers=headers, json=payload) # return response.json() # obtain urls and it's content def getNews(topic,location): count=0 contents=[] titles=[] authors=[] urls=[] data = gnewsclient.NewsClient(language='english',location=location,topic=topic,max_results=10) news = data.get_news() for item in news: url=item['link'] article = Article(url) try: article.download() article.parse() temp=item['title'][::-1] index=temp.find("-") temp=temp[:index-1][::-1] urls.append(url) contents.append(article.text) titles.append(item['title'][:-index-1]) authors.append(temp) count+=1 if(count==5): break except: continue return contents,titles,authors,urls # Summarizes the content- minimum word limit 30 and maximum 60 def getNewsSummary(contents,summarizer): summaries=[] for content in contents: minimum=len(content.split()) summaries.append(summarizer(content,max_length=60,min_length=min(30,minimum),do_sample=False,truncation=True)[0]['summary_text']) return summaries # Obtain 4 keywords from content (person,organisation or geopolitical entity) def generateKeyword(contents): keywords=[] words=[] nlp = spacy.load("en_core_web_lg") labels=["PERSON","ORG","GPE"] for content in contents: doc=nlp(content) keys=[] limit=0 for ent in doc.ents: key=ent.text.upper() label=ent.label_ if(key not in words and key not in keywords and label in labels): keys.append(key) limit+=1 for element in key.split(): words.append(element) if(limit==4): keywords.append(keys) break return keywords # Display title,author and summary in streamlit def DisplaySummary(titles,authors,summaries,keywords,urls): for i in range(5): if(i+1<=len(summaries) and i+1<=len(keywords)): st.text("") st.subheader(f'[{titles[i]}] ({urls[i]})') st.markdown(f'{authors[i]}',unsafe_allow_html=True) st.write(summaries[i]) if(len(keywords[i])==4): annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa")," ",(keywords[i][3],"","#faa")) elif(len(keywords[i])==3): annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")," ",(keywords[i][2],"","#faa")) elif(len(keywords[i])==2): annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")," ",(keywords[i][1],"","#faa")) elif(len(keywords[i])==1): annotated_text("KEYWORDS :",(keywords[i][0],"","#faa")) st.text("") st.text("") def main(): summarizer=load_model() st.title('Briefly') with st.expander('Read trending news in less than 60 words...', expanded=True): with st.form(key='form1'): topic=st.selectbox('Category:',data.topics[2:]+["World"]) location=st.selectbox('Location:',data.locations) submit_button=st.form_submit_button() if submit_button: with st.spinner('Fetching news...'): contents,titles,authors,urls=getNews(topic,location) summaries=getNewsSummary(contents,summarizer) keywords=generateKeyword(contents) DisplaySummary(titles,authors,summaries,keywords,urls) if __name__ == '__main__': main()