File size: 3,031 Bytes
79e1719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3b859b
 
 
 
 
 
79e1719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import re
import plotly.express as px
import datetime
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import datetime

def clean_text(text):
    new_text = text
    for rgx_match in ['[A-Z ]+:']:
        new_text = re.sub(rgx_match, '', new_text)
    return new_text

def prepare_df(df, categories, date_filter):
    try:
        df.drop(columns=['Unnamed: 0'], inplace=True)
    except:
        pass

    #df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'

    # Check if categories is not empty before filtering
    if categories:
        news_data = df[df['topic_verification'].isin(categories)]
    else:
        news_data = df

    try:
        news_data = df[df['topic_verification'].isin(categories)]

        actual_day = datetime.date.today() - datetime.timedelta(days=1)
        pattern_del = actual_day.strftime('%b').upper()

        filter = news_data['headline'].str.contains(pattern_del)
        news_data = news_data[~filter]

        # shift column 'C' to first position 
        first_column = news_data.pop('headline') 
        
        # insert column using insert(position,column_name,first_column) function 
        news_data.insert(0, 'headline', first_column) 

        news_data['updatedDate'] = pd.to_datetime(news_data['updatedDate'], format='%Y-%m-%d %H:%M:%S%z')

        dates = []

        dates.append(datetime.datetime.strftime(date_filter[0], '%Y-%m-%d %H:%M:%S%z'))
        dates.append(datetime.datetime.strftime(date_filter[1], '%Y-%m-%d %H:%M:%S%z'))

        news_data = news_data[(news_data['updatedDate'] >= dates[0]) & (news_data['updatedDate'] <= dates[1])]

    except Exception as E:
        print(E)

    return news_data

def plot_3dgraph(news_data):
    fig = px.scatter_3d(news_data, 
                        x='neutral_score', 
                        y='negative_score', 
                        z='positive_score',
                        color='positive_score', 
                        hover_name ='headline', 
                        color_continuous_scale='RdBu', 
                        size_max=40,
                        size='negative_score',
                        #text='headline',
                        hover_data='topic_verification')

    fig.update_layout(
        height=600,
        title=dict(text=f"News Semantics towards Crude Oil Price <br><sup>Hover cursor on a datapoint to show news title</sup>", 
                   font=dict(size=35), 
                   automargin=False)
    )

    fig.update_traces(textfont_size=8)

    trace=dict(type='scatter3d',
           x=news_data.iloc[[-1]]['neutral_score'],
           y=news_data.iloc[[-1]]['negative_score'],
           z=news_data.iloc[[-1]]['positive_score'],
           mode='markers',
           name= 'MEAN OF SELECTED NEWS',
           marker=dict(color=[f'rgb({0}, {250}, {200})' for _ in range(25)],
                       size=10)
          )

    fig.add_trace(trace)

    return fig