TanishqO0F commited on
Commit
38c2418
·
verified ·
1 Parent(s): c7557b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -58
app.py CHANGED
@@ -1,75 +1,94 @@
1
  import gradio as gr
2
- import selenium
3
  import requests
4
  from bs4 import BeautifulSoup
5
  import pandas as pd
6
-
7
- from selenium import webdriver
8
- from selenium.webdriver.common.keys import Keys
9
- import pandas as pd
10
- import time
11
  from transformers import pipeline
 
 
12
 
13
- # Search Query
14
- def news_and_analysis(query):
15
 
16
- # Encode special characters in a text string
17
- def encode_special_characters(text):
18
- encoded_text = ''
19
- special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'} # Add more special characters as needed
20
- for char in text.lower():
21
- encoded_text += special_characters.get(char, char)
22
- return encoded_text
23
 
24
- query2 = encode_special_characters(query)
25
- url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3"
26
-
27
- response = requests.get(url, verify = False)
 
 
 
 
 
 
 
 
28
  soup = BeautifulSoup(response.text, 'html.parser')
29
-
30
  articles = soup.find_all('article')
31
- links = [article.find('a')['href'] for article in articles]
32
- links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]
33
-
34
- news_text = [article.get_text(separator='\n') for article in articles]
35
- news_text_split = [text.split('\n') for text in news_text]
36
-
37
- news_df = pd.DataFrame({
38
- 'Title': [text[2] for text in news_text_split],
39
- 'Source': [text[0] for text in news_text_split],
40
- 'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split],
41
- 'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split],
42
- 'Link': links
43
- })
 
 
44
 
45
- news_df = news_df.loc[0:5,:]
46
- options = webdriver.ChromeOptions()
47
- options.add_argument('--headless')
48
- options.add_argument('--no-sandbox')
49
- options.add_argument('--disable-dev-shm-usage')
50
- options.use_chromium = True
51
- driver = webdriver.Chrome(options = options)
52
 
53
- classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
 
 
 
54
 
55
- news_df['Sentiment'] = ''
56
- for i in range(0, len(news_df)):
57
- # driver.get(news_df.loc[i,'Link'])
58
- # time.sleep(10)
59
- # headline = driver.find_element('xpath', '//h1').text
60
- #news_df.loc[i, 'Headline'] = headline
61
- title = news_df.loc[i, 'Title']
62
- news_df.loc[i, 'Sentiment'] = str(classification(title))
63
- print(news_df)
64
-
65
- return(news_df)
 
 
 
 
66
 
 
67
  with gr.Blocks() as demo:
68
-
69
-
70
- topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis")
 
 
 
 
 
71
 
72
- btn = gr.Button(value="Submit")
73
- btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe())
 
 
 
74
 
75
- demo.launch()
 
 
1
  import gradio as gr
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
 
 
 
 
 
5
  from transformers import pipeline
6
+ import plotly.graph_objects as go
7
+ from datetime import datetime, timedelta
8
 
9
+ # Sentiment Analysis Model
10
+ sentiment_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
11
 
12
+ # Function to encode special characters in the search query
13
+ def encode_special_characters(text):
14
+ encoded_text = ''
15
+ special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}
16
+ for char in text.lower():
17
+ encoded_text += special_characters.get(char, char)
18
+ return encoded_text
19
 
20
+ # Function to fetch news articles
21
+ def fetch_news(query, num_articles=10):
22
+ encoded_query = encode_special_characters(query)
23
+ url = f"https://news.google.com/search?q={encoded_query}&hl=en-US&gl=in&ceid=US%3Aen&num={num_articles}"
24
+
25
+ try:
26
+ response = requests.get(url)
27
+ response.raise_for_status()
28
+ except requests.RequestException as e:
29
+ print(f"Error fetching news: {e}")
30
+ return pd.DataFrame()
31
+
32
  soup = BeautifulSoup(response.text, 'html.parser')
 
33
  articles = soup.find_all('article')
34
+
35
+ news_data = []
36
+ for article in articles[:num_articles]:
37
+ link = article.find('a')['href'].replace("./articles/", "https://news.google.com/articles/")
38
+ text_parts = article.get_text(separator='\n').split('\n')
39
+
40
+ news_data.append({
41
+ 'Title': text_parts[2] if len(text_parts) > 2 else 'Missing',
42
+ 'Source': text_parts[0] if len(text_parts) > 0 else 'Missing',
43
+ 'Time': text_parts[3] if len(text_parts) > 3 else 'Missing',
44
+ 'Author': text_parts[4].split('By ')[-1] if len(text_parts) > 4 else 'Missing',
45
+ 'Link': link
46
+ })
47
+
48
+ return pd.DataFrame(news_data)
49
 
50
+ # Function to perform sentiment analysis
51
+ def analyze_sentiment(text):
52
+ result = sentiment_model(text)[0]
53
+ return result['label'], result['score']
 
 
 
54
 
55
+ # Main function to process news and perform analysis
56
+ def news_and_analysis(query):
57
+ # Fetch news
58
+ news_df = fetch_news(query)
59
 
60
+ if news_df.empty:
61
+ return "No news articles found.", None
62
+
63
+ # Perform sentiment analysis
64
+ news_df['Sentiment'], news_df['Sentiment_Score'] = zip(*news_df['Title'].apply(analyze_sentiment))
65
+
66
+ # Create sentiment plot
67
+ sentiment_fig = go.Figure(data=[go.Bar(
68
+ x=news_df['Time'],
69
+ y=news_df['Sentiment_Score'],
70
+ marker_color=news_df['Sentiment'].map({'positive': 'green', 'neutral': 'gray', 'negative': 'red'})
71
+ )])
72
+ sentiment_fig.update_layout(title='News Sentiment Over Time', xaxis_title='Time', yaxis_title='Sentiment Score')
73
+
74
+ return news_df, sentiment_fig
75
 
76
+ # Gradio interface
77
  with gr.Blocks() as demo:
78
+ gr.Markdown("# Financial News Sentiment Analysis")
79
+
80
+ topic = gr.Textbox(label="Enter a financial topic or company name")
81
+
82
+ analyze_btn = gr.Button(value="Analyze")
83
+
84
+ news_output = gr.DataFrame(label="News and Sentiment Analysis")
85
+ sentiment_plot = gr.Plot(label="Sentiment Analysis")
86
 
87
+ analyze_btn.click(
88
+ news_and_analysis,
89
+ inputs=[topic],
90
+ outputs=[news_output, sentiment_plot]
91
+ )
92
 
93
+ if __name__ == "__main__":
94
+ demo.launch()