import requests
import gradio as gr
import pandas as pd
import os
from newsapi import NewsApiClient
from datetime import date, timedelta
from transformers import pipeline
HF_TOKEN = os.environ["newsapi"]
# Initialization
newsapi = NewsApiClient(api_key=HF_TOKEN)
classifier = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment")
today = str(date.today())
print("******** Outside Inference function ********")
print(f"HF_TOKEN is - {HF_TOKEN}")
#top-headlines
all_top_headlines = newsapi.get_top_headlines(country='in')
sentiment_tophead = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_top_headlines['articles']]
print(f"sentiment_tophead length is {len(sentiment_tophead)}")
print(f"all_top_headlines length is {len(all_top_headlines['articles'])}")
print("************** sentiment start ****************")
print(sentiment_tophead)
print("************** sentiment end ****************")
#times of india
all_articles_toi = newsapi.get_everything(sources='the-times-of-india',
domains= 'http://timesofindia.indiatimes.com', #'timesofindia.indiatimes.com',
from_param=today,
to=today,
language='en',
sort_by='relevancy',)
sentiment_toi = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_articles_toi['articles']]
print(f"sentiment_toi length is {len(sentiment_toi)}")
print(f"all_articles_toi length is {len(all_articles_toi['articles'])}")
#Driver positive
def inference_pos(newssource): #, date):
if newssource == "Times Of India":
sentiment = sentiment_toi
all_articles = all_articles_toi
elif newssource == "Top Headlines":
sentiment = sentiment_tophead
all_articles = all_top_headlines
#""link text
description = [entry['description'] for entry in all_articles['articles']]
content = [entry['content'] for entry in all_articles['articles']]
url = ["Click here for the original news article' for entry in all_articles['articles']]
urlToImage = ["
" for entry in all_articles['articles']]
print("********************* Positive News **************************")
print(f"Newssource is - {newssource}")
print(f"description length is - {len(description)}")
print(f"content length is - {len(content)}")
print(f"url length is - {len(url)}")
print(f"urlToImage length is - {len(urlToImage)}")
print(f"sentiment length is - {len(sentiment)}")
dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}
df = pd.DataFrame.from_dict(dictnews)
df = df.loc[df['sentiment'] == 'Positive']
print(f"dataframe shape is :,{df.shape}")
return df
#Driver - negative
def inference_neg(newssource): #, date):
if newssource == "Times Of India":
sentiment = sentiment_toi
all_articles = all_articles_toi
elif newssource == "Top Headlines":
sentiment = sentiment_tophead
all_articles = all_top_headlines
description = [entry['description'] for entry in all_articles['articles']]
content = [entry['content'] for entry in all_articles['articles']]
url = ["Click here for the original news article' for entry in all_articles['articles']]
urlToImage = ["
" for entry in all_articles['articles']]
print("********************* Negative News ***********************")
print(f"Newssource is - {newssource}")
print(f"description length is - {len(description)}")
print(f"content length is - {len(content)}")
print(f"url length is - {len(url)}")
print(f"urlToImage length is - {len(urlToImage)}")
print(f"sentiment length is - {len(sentiment)}")
dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}
df = pd.DataFrame.from_dict(dictnews)
df = df.loc[df['sentiment'] == 'Negative']
print(f"dataframe shape is :,{df.shape}")
return df
#Driver - neutral
def inference_neut(newssource): #, date):
if newssource == "Times Of India":
sentiment = sentiment_toi
all_articles = all_articles_toi
elif newssource == "Top Headlines":
sentiment = sentiment_tophead
all_articles = all_top_headlines
description = [entry['description'] for entry in all_articles['articles']]
content = [entry['content'] for entry in all_articles['articles']]
url = ["Click here for the original news article' for entry in all_articles['articles']]
urlToImage = ["
" for entry in all_articles['articles']]
print("********************* Neutral News ***********************")
print(f"Newssource is - {newssource}")
print(f"description length is - {len(description)}")
print(f"content length is - {len(content)}")
print(f"url length is - {len(url)}")
print(f"urlToImage length is - {len(urlToImage)}")
print(f"sentiment length is - {len(sentiment)}")
dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}
df = pd.DataFrame.from_dict(dictnews)
df = df.loc[df['sentiment'] == 'Neutral']
print(f"dataframe shape is :,{df.shape}")
return df
#Gradio Blocks
with gr.Blocks() as demo:
with gr.Row():
in_newssource = gr.Dropdown(["Times Of India", "Top Headlines"], label='Choose a News Outlet')
#in_date = gr.Textbox(visible = False, value = today)
with gr.Row():
b1 = gr.Button("Get Positive News")
b2 = gr.Button("Get Negative News")
b3 = gr.Button("Get Neutral News")
with gr.Row():
#sample
#out_news = gr.HTML(label="First News Link", show_label=True)
out_dataframe = gr.Dataframe(wrap=True, datatype = ["str", "str", "markdown", "markdown", "str"])
b1.click(fn=inference_pos, inputs=in_newssource, outputs=out_dataframe) #, out_news])
b2.click(fn=inference_neg, inputs=in_newssource, outputs=out_dataframe) #, out_news])
b3.click(fn=inference_neut, inputs=in_newssource, outputs=out_dataframe) #, out_news])
demo.launch(debug=True, show_error=True)