File size: 2,933 Bytes
3f4aa97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from imports import *
from data_preprocessing import *
from web_scrapper import *
from model import*

# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')

st.set_page_config(page_title="News Sentiment Analysis")
st.title("News Sentiment Analysis")

use_logistic = False

model_used = st.radio("Select model for analysis",["Bert(Preferred)","Logistic"])
if model_used=="Logistic":
    use_logistic=True
else:
    use_logistic = False

if use_logistic:
    logistic_model()
    #Load the trained model and preprocessing objects
    logreg = joblib.load('logreg_model.pkl')
    with open('vectorizer.pkl', 'rb') as handle:
        vectorizer = pickle.load(handle)
    with open('label_encoder.pkl', 'rb') as handle:
        label_encoder = pickle.load(handle)
else:
    model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)

domain = st.radio("Select Domain of news for analysis",["Sports","Tech","Politics","Entertainment","Business"],index=None)
if domain!=None:
    if domain=="Sports":
        titles, links = scrape_sports(url="https://www.indiatoday.in/search/sports", driver_path='chromedriver.exe')
    elif domain=="Tech":
        titles, links = scrape_sports(url="https://indianexpress.com/section/technology/", driver_path='chromedriver.exe')
    elif domain=="Politics":
        titles, links = scrape_sports(url="https://indianexpress.com/section/political-pulse/", driver_path='chromedriver.exe')
    elif domain=="Entertainment":
        titles, links = scrape_sports(url="https://indianexpress.com/section/entertainment/", driver_path='chromedriver.exe')
    elif domain=="Business":
        titles, links = scrape_sports(url="https://indianexpress.com/section/business/", driver_path='chromedriver.exe')

    if use_logistic:
        # Clean and preprocess titles
        cleaned_titles = [clean(title) for title in titles]
        lemmatized_titles = [lemmatize_text(title) for title in cleaned_titles]
        transformed_titles = vectorizer.transform(lemmatized_titles)
        # Make predictions
        predictions = logreg.predict(transformed_titles)
        predicted_labels = label_encoder.inverse_transform(predictions)
        for title, link,sentiment in zip(titles, links,predicted_labels):
            st.write(f"Title: {title}")
            st.write(f"Link: {link}")
            st.write(f"Predicted Sentiment: {sentiment}")
            st.write("-----")
    else:
    # Display results
        for title, link in zip(titles, links):
            sentiment = analyze_sentiment(title,tokenizer,model)
            st.write(f"Title: {title}")
            st.write(f"Link: {link}")
            st.write(f"Predicted Sentiment: {sentiment}")
            st.write("-----")