ravi6389 commited on
Commit
3d1db20
1 Parent(s): 62df500

Upload 3 files

Browse files
Files changed (3) hide show
  1. gnews2.py +75 -0
  2. packages.txt +1 -0
  3. requirements.txt +6 -0
gnews2.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import selenium
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import pandas as pd
6
+
7
+ from selenium import webdriver
8
+ from selenium.webdriver.common.keys import Keys
9
+ import pandas as pd
10
+ import time
11
+ from transformers import pipeline
12
+
13
+ # Search Query
14
+ def news_and_analysis(query):
15
+
16
+ # Encode special characters in a text string
17
+ def encode_special_characters(text):
18
+ encoded_text = ''
19
+ special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'} # Add more special characters as needed
20
+ for char in text.lower():
21
+ encoded_text += special_characters.get(char, char)
22
+ return encoded_text
23
+
24
+ query2 = encode_special_characters(query)
25
+ url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3"
26
+
27
+ response = requests.get(url, verify = False)
28
+ soup = BeautifulSoup(response.text, 'html.parser')
29
+
30
+ articles = soup.find_all('article')
31
+ links = [article.find('a')['href'] for article in articles]
32
+ links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]
33
+
34
+ news_text = [article.get_text(separator='\n') for article in articles]
35
+ news_text_split = [text.split('\n') for text in news_text]
36
+
37
+ news_df = pd.DataFrame({
38
+ 'Title': [text[2] for text in news_text_split],
39
+ 'Source': [text[0] for text in news_text_split],
40
+ 'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split],
41
+ 'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split],
42
+ 'Link': links
43
+ })
44
+
45
+ news_df = news_df.loc[0:5,:]
46
+ options = webdriver.ChromeOptions()
47
+ options.add_argument('--headless')
48
+ options.add_argument('--no-sandbox')
49
+ options.add_argument('--disable-dev-shm-usage')
50
+ options.use_chromium = True
51
+ driver = webdriver.Chrome(options = options)
52
+
53
+ classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
54
+
55
+ news_df['Sentiment'] = ''
56
+ for i in range(0, len(news_df)):
57
+ # driver.get(news_df.loc[i,'Link'])
58
+ # time.sleep(10)
59
+ # headline = driver.find_element('xpath', '//h1').text
60
+ #news_df.loc[i, 'Headline'] = headline
61
+ title = news_df.loc[i, 'Title']
62
+ news_df.loc[i, 'Sentiment'] = str(classification(title))
63
+ print(news_df)
64
+
65
+ return(news_df)
66
+
67
+ with gr.Blocks() as demo:
68
+
69
+
70
+ topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis")
71
+
72
+ btn = gr.Button(value="Submit")
73
+ btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe())
74
+
75
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ chromium-driver
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ selenium ==4.21.0
2
+ gradio>=3.40.1
3
+ transformers
4
+ tensorflow
5
+ beautifulsoup4
6
+ requests