Spaces:

ravi6389
/

Google_News_Sentiment_Analysis

Sleeping

App Files Files Community

ravi6389 commited on May 20, 2024

Commit

3d1db20

•

1 Parent(s): 62df500

Upload 3 files

Browse files

Files changed (3) hide show

gnews2.py +75 -0
packages.txt +1 -0
requirements.txt +6 -0

gnews2.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import selenium
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+import pandas as pd
+import time
+from transformers import pipeline
+# Search Query
+def news_and_analysis(query):
+# Encode special characters in a text string
+    def encode_special_characters(text):
+        encoded_text = ''
+        special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}  # Add more special characters as needed
+        for char in text.lower():
+            encoded_text += special_characters.get(char, char)
+        return encoded_text
+    query2 = encode_special_characters(query)
+    url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3"
+    response = requests.get(url, verify = False)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    articles = soup.find_all('article')
+    links = [article.find('a')['href'] for article in articles]
+    links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]
+    news_text = [article.get_text(separator='\n') for article in articles]
+    news_text_split = [text.split('\n') for text in news_text]
+    news_df = pd.DataFrame({
+        'Title': [text[2] for text in news_text_split],
+        'Source': [text[0] for text in news_text_split],
+        'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split],
+        'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split],
+        'Link': links
+    })
+    news_df = news_df.loc[0:5,:]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    options.use_chromium = True
+    driver = webdriver.Chrome(options = options)
+    classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
+    news_df['Sentiment'] = ''
+    for i in range(0, len(news_df)):
+        # driver.get(news_df.loc[i,'Link'])
+        # time.sleep(10)
+        # headline = driver.find_element('xpath', '//h1').text
+        #news_df.loc[i, 'Headline'] = headline
+        title = news_df.loc[i, 'Title']
+        news_df.loc[i, 'Sentiment'] = str(classification(title))
+        print(news_df)
+    return(news_df)
+with gr.Blocks() as demo:
+    topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis")
+    btn = gr.Button(value="Submit")
+    btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe())
+demo.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ chromium-driver

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+selenium ==4.21.0
+gradio>=3.40.1
+transformers
+tensorflow
+beautifulsoup4
+requests