ravi6389's picture
Rename gnews2.py to app.py
991776d verified
raw
history blame
2.7 kB
import gradio as gr
import selenium
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
import time
from transformers import pipeline
# Search Query
def news_and_analysis(query):
# Encode special characters in a text string
def encode_special_characters(text):
encoded_text = ''
special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'} # Add more special characters as needed
for char in text.lower():
encoded_text += special_characters.get(char, char)
return encoded_text
query2 = encode_special_characters(query)
url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3"
response = requests.get(url, verify = False)
soup = BeautifulSoup(response.text, 'html.parser')
articles = soup.find_all('article')
links = [article.find('a')['href'] for article in articles]
links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]
news_text = [article.get_text(separator='\n') for article in articles]
news_text_split = [text.split('\n') for text in news_text]
news_df = pd.DataFrame({
'Title': [text[2] for text in news_text_split],
'Source': [text[0] for text in news_text_split],
'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split],
'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split],
'Link': links
})
news_df = news_df.loc[0:5,:]
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.use_chromium = True
driver = webdriver.Chrome(options = options)
classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
news_df['Sentiment'] = ''
for i in range(0, len(news_df)):
# driver.get(news_df.loc[i,'Link'])
# time.sleep(10)
# headline = driver.find_element('xpath', '//h1').text
#news_df.loc[i, 'Headline'] = headline
title = news_df.loc[i, 'Title']
news_df.loc[i, 'Sentiment'] = str(classification(title))
print(news_df)
return(news_df)
with gr.Blocks() as demo:
topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis")
btn = gr.Button(value="Submit")
btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe())
demo.launch()