ryanrahmadifa commited on
Commit
8010b21
·
1 Parent(s): 220ae8f

Added more features.

Browse files
modules/__pycache__/data_preparation.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/data_preparation.cpython-39.pyc and b/modules/__pycache__/data_preparation.cpython-39.pyc differ
 
modules/data_preparation.py CHANGED
@@ -3,6 +3,8 @@ import plotly.express as px
3
  import datetime
4
  import plotly.graph_objects as go
5
  import numpy as np
 
 
6
 
7
  def clean_text(text):
8
  new_text = text
@@ -32,10 +34,15 @@ def prepare_df(df, categories, date_filter):
32
 
33
  # insert column using insert(position,column_name,first_column) function
34
  news_data.insert(0, 'headline', first_column)
35
-
36
- news_data['updatedDate'] = news_data['updatedDate'].apply(lambda x: datetime.datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
37
 
38
- news_data = news_data[(news_data['updatedDate'] >= date_filter[0]) & (news_data['updatedDate'] <= date_filter[1])]
 
 
 
 
 
 
 
39
 
40
  except Exception as E:
41
  print(E)
 
3
  import datetime
4
  import plotly.graph_objects as go
5
  import numpy as np
6
+ import pandas as pd
7
+ import datetime
8
 
9
  def clean_text(text):
10
  new_text = text
 
34
 
35
  # insert column using insert(position,column_name,first_column) function
36
  news_data.insert(0, 'headline', first_column)
 
 
37
 
38
+ news_data['updatedDate'] = pd.to_datetime(news_data['updatedDate'], format='%Y-%m-%d %H:%M:%S%z')
39
+
40
+ dates = []
41
+
42
+ dates.append(datetime.datetime.strftime(date_filter[0], '%Y-%m-%d %H:%M:%S%z'))
43
+ dates.append(datetime.datetime.strftime(date_filter[1], '%Y-%m-%d %H:%M:%S%z'))
44
+
45
+ news_data = news_data[(news_data['updatedDate'] >= dates[0]) & (news_data['updatedDate'] <= dates[1])]
46
 
47
  except Exception as E:
48
  print(E)
test.ipynb CHANGED
@@ -871,12 +871,327 @@
871
  "print(f'Bearish prediction {test.negative_score.mean()}\\n Neutral prediction {test.neutral_score.mean()}\\n Bullish prediction {test.positive_score.mean()}')"
872
  ]
873
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  {
875
  "cell_type": "code",
876
  "execution_count": null,
877
  "metadata": {},
878
  "outputs": [],
879
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
  }
881
  ],
882
  "metadata": {
 
871
  "print(f'Bearish prediction {test.negative_score.mean()}\\n Neutral prediction {test.neutral_score.mean()}\\n Bullish prediction {test.positive_score.mean()}')"
872
  ]
873
  },
874
+ {
875
+ "cell_type": "code",
876
+ "execution_count": 24,
877
+ "metadata": {},
878
+ "outputs": [
879
+ {
880
+ "data": {
881
+ "text/html": [
882
+ "<div>\n",
883
+ "<style scoped>\n",
884
+ " .dataframe tbody tr th:only-of-type {\n",
885
+ " vertical-align: middle;\n",
886
+ " }\n",
887
+ "\n",
888
+ " .dataframe tbody tr th {\n",
889
+ " vertical-align: top;\n",
890
+ " }\n",
891
+ "\n",
892
+ " .dataframe thead th {\n",
893
+ " text-align: right;\n",
894
+ " }\n",
895
+ "</style>\n",
896
+ "<table border=\"1\" class=\"dataframe\">\n",
897
+ " <thead>\n",
898
+ " <tr style=\"text-align: right;\">\n",
899
+ " <th></th>\n",
900
+ " <th>body</th>\n",
901
+ " <th>headline</th>\n",
902
+ " <th>updatedDate</th>\n",
903
+ " <th>topic_prediction</th>\n",
904
+ " <th>topic_verification</th>\n",
905
+ " <th>negative_score</th>\n",
906
+ " <th>neutral_score</th>\n",
907
+ " <th>positive_score</th>\n",
908
+ " <th>trend_prediction</th>\n",
909
+ " </tr>\n",
910
+ " </thead>\n",
911
+ " <tbody>\n",
912
+ " <tr>\n",
913
+ " <th>0</th>\n",
914
+ " <td>Spanish crude import volumes increased 11% ye...</td>\n",
915
+ " <td>SPAIN DATA: H1 crude imports rise 11% to 1.4 m...</td>\n",
916
+ " <td>2024-08-08 12:11:55+00:00</td>\n",
917
+ " <td>Crude Oil</td>\n",
918
+ " <td>Crude Oil</td>\n",
919
+ " <td>0.991473</td>\n",
920
+ " <td>0.005524</td>\n",
921
+ " <td>0.519264</td>\n",
922
+ " <td>Bearish</td>\n",
923
+ " </tr>\n",
924
+ " <tr>\n",
925
+ " <th>1</th>\n",
926
+ " <td>A number of refineries in China have resumed ...</td>\n",
927
+ " <td>REFINERY NEWS: Host of Chinese units back from...</td>\n",
928
+ " <td>2024-08-08 11:51:12+00:00</td>\n",
929
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
930
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
931
+ " <td>0.417054</td>\n",
932
+ " <td>0.845595</td>\n",
933
+ " <td>0.180685</td>\n",
934
+ " <td>Neutral</td>\n",
935
+ " </tr>\n",
936
+ " <tr>\n",
937
+ " <th>2</th>\n",
938
+ " <td>Some refineries in the Asia-Pacific region in...</td>\n",
939
+ " <td>REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific</td>\n",
940
+ " <td>2024-08-08 11:50:48+00:00</td>\n",
941
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
942
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
943
+ " <td>0.268708</td>\n",
944
+ " <td>0.044504</td>\n",
945
+ " <td>0.992063</td>\n",
946
+ " <td>Bullish</td>\n",
947
+ " </tr>\n",
948
+ " <tr>\n",
949
+ " <th>3</th>\n",
950
+ " <td>The physical low sulfur (1%S) fuel oil Med-No...</td>\n",
951
+ " <td>Physical 1%S fuel oil Med-North spread hits re...</td>\n",
952
+ " <td>2024-08-08 11:28:20+00:00</td>\n",
953
+ " <td>Middle Distillates</td>\n",
954
+ " <td>Middle Distillates</td>\n",
955
+ " <td>0.951985</td>\n",
956
+ " <td>0.009613</td>\n",
957
+ " <td>0.822905</td>\n",
958
+ " <td>Bearish</td>\n",
959
+ " </tr>\n",
960
+ " <tr>\n",
961
+ " <th>4</th>\n",
962
+ " <td>Bunkering activity in India has experienced s...</td>\n",
963
+ " <td>Indian ports see Jan-July bunker, STS calls up...</td>\n",
964
+ " <td>2024-08-08 11:27:15+00:00</td>\n",
965
+ " <td>Heavy Distillates</td>\n",
966
+ " <td>Heavy Distillates</td>\n",
967
+ " <td>0.098844</td>\n",
968
+ " <td>0.059348</td>\n",
969
+ " <td>0.997325</td>\n",
970
+ " <td>Bullish</td>\n",
971
+ " </tr>\n",
972
+ " <tr>\n",
973
+ " <th>...</th>\n",
974
+ " <td>...</td>\n",
975
+ " <td>...</td>\n",
976
+ " <td>...</td>\n",
977
+ " <td>...</td>\n",
978
+ " <td>...</td>\n",
979
+ " <td>...</td>\n",
980
+ " <td>...</td>\n",
981
+ " <td>...</td>\n",
982
+ " <td>...</td>\n",
983
+ " </tr>\n",
984
+ " <tr>\n",
985
+ " <th>136</th>\n",
986
+ " <td>Saudi Aramco maintained or raised the Asia-bo...</td>\n",
987
+ " <td>Saudi Aramco maintains or raises Asia-bound Se...</td>\n",
988
+ " <td>2024-08-05 01:59:16+00:00</td>\n",
989
+ " <td>Crude Oil</td>\n",
990
+ " <td>Crude Oil</td>\n",
991
+ " <td>0.154642</td>\n",
992
+ " <td>0.032633</td>\n",
993
+ " <td>0.997273</td>\n",
994
+ " <td>Bullish</td>\n",
995
+ " </tr>\n",
996
+ " <tr>\n",
997
+ " <th>137</th>\n",
998
+ " <td>The combined open interest for front-month Si...</td>\n",
999
+ " <td>ICE front-month Singapore HSFO open interest r...</td>\n",
1000
+ " <td>2024-08-05 01:26:07+00:00</td>\n",
1001
+ " <td>Heavy Distillates</td>\n",
1002
+ " <td>Heavy Distillates</td>\n",
1003
+ " <td>0.368089</td>\n",
1004
+ " <td>0.017483</td>\n",
1005
+ " <td>0.994805</td>\n",
1006
+ " <td>Bullish</td>\n",
1007
+ " </tr>\n",
1008
+ " <tr>\n",
1009
+ " <th>138</th>\n",
1010
+ " <td>Production will be increasing “in the near fu...</td>\n",
1011
+ " <td>REFINERY NEWS: Fort Energy at Fujairah ‘remain...</td>\n",
1012
+ " <td>2024-08-05 00:45:56+00:00</td>\n",
1013
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1014
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1015
+ " <td>0.775953</td>\n",
1016
+ " <td>0.520116</td>\n",
1017
+ " <td>0.177664</td>\n",
1018
+ " <td>Bearish</td>\n",
1019
+ " </tr>\n",
1020
+ " <tr>\n",
1021
+ " <th>139</th>\n",
1022
+ " <td>Container ship Groton was attacked 125 nautic...</td>\n",
1023
+ " <td>Container ship Groton attacked near Yemen amid...</td>\n",
1024
+ " <td>2024-08-04 12:25:30+00:00</td>\n",
1025
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1026
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1027
+ " <td>0.245594</td>\n",
1028
+ " <td>0.044671</td>\n",
1029
+ " <td>0.994086</td>\n",
1030
+ " <td>Bullish</td>\n",
1031
+ " </tr>\n",
1032
+ " <tr>\n",
1033
+ " <th>140</th>\n",
1034
+ " <td>A drone strike on an oil depot in Russia’s Be...</td>\n",
1035
+ " <td>Oil depot in Russia’s Belgorod region hit by d...</td>\n",
1036
+ " <td>2024-08-04 10:14:50+00:00</td>\n",
1037
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1038
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
1039
+ " <td>0.243901</td>\n",
1040
+ " <td>0.951199</td>\n",
1041
+ " <td>0.129076</td>\n",
1042
+ " <td>Neutral</td>\n",
1043
+ " </tr>\n",
1044
+ " </tbody>\n",
1045
+ "</table>\n",
1046
+ "<p>141 rows × 9 columns</p>\n",
1047
+ "</div>"
1048
+ ],
1049
+ "text/plain": [
1050
+ " body \\\n",
1051
+ "0 Spanish crude import volumes increased 11% ye... \n",
1052
+ "1 A number of refineries in China have resumed ... \n",
1053
+ "2 Some refineries in the Asia-Pacific region in... \n",
1054
+ "3 The physical low sulfur (1%S) fuel oil Med-No... \n",
1055
+ "4 Bunkering activity in India has experienced s... \n",
1056
+ ".. ... \n",
1057
+ "136 Saudi Aramco maintained or raised the Asia-bo... \n",
1058
+ "137 The combined open interest for front-month Si... \n",
1059
+ "138 Production will be increasing “in the near fu... \n",
1060
+ "139 Container ship Groton was attacked 125 nautic... \n",
1061
+ "140 A drone strike on an oil depot in Russia’s Be... \n",
1062
+ "\n",
1063
+ " headline \\\n",
1064
+ "0 SPAIN DATA: H1 crude imports rise 11% to 1.4 m... \n",
1065
+ "1 REFINERY NEWS: Host of Chinese units back from... \n",
1066
+ "2 REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific \n",
1067
+ "3 Physical 1%S fuel oil Med-North spread hits re... \n",
1068
+ "4 Indian ports see Jan-July bunker, STS calls up... \n",
1069
+ ".. ... \n",
1070
+ "136 Saudi Aramco maintains or raises Asia-bound Se... \n",
1071
+ "137 ICE front-month Singapore HSFO open interest r... \n",
1072
+ "138 REFINERY NEWS: Fort Energy at Fujairah ‘remain... \n",
1073
+ "139 Container ship Groton attacked near Yemen amid... \n",
1074
+ "140 Oil depot in Russia’s Belgorod region hit by d... \n",
1075
+ "\n",
1076
+ " updatedDate topic_prediction \\\n",
1077
+ "0 2024-08-08 12:11:55+00:00 Crude Oil \n",
1078
+ "1 2024-08-08 11:51:12+00:00 Macroeconomic & Geopolitics \n",
1079
+ "2 2024-08-08 11:50:48+00:00 Macroeconomic & Geopolitics \n",
1080
+ "3 2024-08-08 11:28:20+00:00 Middle Distillates \n",
1081
+ "4 2024-08-08 11:27:15+00:00 Heavy Distillates \n",
1082
+ ".. ... ... \n",
1083
+ "136 2024-08-05 01:59:16+00:00 Crude Oil \n",
1084
+ "137 2024-08-05 01:26:07+00:00 Heavy Distillates \n",
1085
+ "138 2024-08-05 00:45:56+00:00 Macroeconomic & Geopolitics \n",
1086
+ "139 2024-08-04 12:25:30+00:00 Macroeconomic & Geopolitics \n",
1087
+ "140 2024-08-04 10:14:50+00:00 Macroeconomic & Geopolitics \n",
1088
+ "\n",
1089
+ " topic_verification negative_score neutral_score \\\n",
1090
+ "0 Crude Oil 0.991473 0.005524 \n",
1091
+ "1 Macroeconomic & Geopolitics 0.417054 0.845595 \n",
1092
+ "2 Macroeconomic & Geopolitics 0.268708 0.044504 \n",
1093
+ "3 Middle Distillates 0.951985 0.009613 \n",
1094
+ "4 Heavy Distillates 0.098844 0.059348 \n",
1095
+ ".. ... ... ... \n",
1096
+ "136 Crude Oil 0.154642 0.032633 \n",
1097
+ "137 Heavy Distillates 0.368089 0.017483 \n",
1098
+ "138 Macroeconomic & Geopolitics 0.775953 0.520116 \n",
1099
+ "139 Macroeconomic & Geopolitics 0.245594 0.044671 \n",
1100
+ "140 Macroeconomic & Geopolitics 0.243901 0.951199 \n",
1101
+ "\n",
1102
+ " positive_score trend_prediction \n",
1103
+ "0 0.519264 Bearish \n",
1104
+ "1 0.180685 Neutral \n",
1105
+ "2 0.992063 Bullish \n",
1106
+ "3 0.822905 Bearish \n",
1107
+ "4 0.997325 Bullish \n",
1108
+ ".. ... ... \n",
1109
+ "136 0.997273 Bullish \n",
1110
+ "137 0.994805 Bullish \n",
1111
+ "138 0.177664 Bearish \n",
1112
+ "139 0.994086 Bullish \n",
1113
+ "140 0.129076 Neutral \n",
1114
+ "\n",
1115
+ "[141 rows x 9 columns]"
1116
+ ]
1117
+ },
1118
+ "execution_count": 24,
1119
+ "metadata": {},
1120
+ "output_type": "execute_result"
1121
+ }
1122
+ ],
1123
+ "source": [
1124
+ "import pandas as pd\n",
1125
+ "import os\n",
1126
+ "\n",
1127
+ "test = pd.read_excel('evaluation.xlsx').drop(columns=['Unnamed: 0']).iloc[:, :9]\n",
1128
+ "\n",
1129
+ "test"
1130
+ ]
1131
+ },
1132
+ {
1133
+ "cell_type": "code",
1134
+ "execution_count": 29,
1135
+ "metadata": {},
1136
+ "outputs": [],
1137
+ "source": [
1138
+ "test['updatedDate'] = pd.to_datetime(test['updatedDate'], format='%Y-%m-%d %H:%M:%S%z')"
1139
+ ]
1140
+ },
1141
+ {
1142
+ "cell_type": "code",
1143
+ "execution_count": 30,
1144
+ "metadata": {},
1145
+ "outputs": [
1146
+ {
1147
+ "name": "stdout",
1148
+ "output_type": "stream",
1149
+ "text": [
1150
+ "<class 'pandas.core.frame.DataFrame'>\n",
1151
+ "RangeIndex: 141 entries, 0 to 140\n",
1152
+ "Data columns (total 9 columns):\n",
1153
+ " # Column Non-Null Count Dtype \n",
1154
+ "--- ------ -------------- ----- \n",
1155
+ " 0 body 141 non-null object \n",
1156
+ " 1 headline 141 non-null object \n",
1157
+ " 2 updatedDate 141 non-null datetime64[ns, UTC]\n",
1158
+ " 3 topic_prediction 141 non-null object \n",
1159
+ " 4 topic_verification 141 non-null object \n",
1160
+ " 5 negative_score 141 non-null float64 \n",
1161
+ " 6 neutral_score 141 non-null float64 \n",
1162
+ " 7 positive_score 141 non-null float64 \n",
1163
+ " 8 trend_prediction 141 non-null object \n",
1164
+ "dtypes: datetime64[ns, UTC](1), float64(3), object(5)\n",
1165
+ "memory usage: 10.0+ KB\n"
1166
+ ]
1167
+ }
1168
+ ],
1169
+ "source": [
1170
+ "test.info()"
1171
+ ]
1172
+ },
1173
  {
1174
  "cell_type": "code",
1175
  "execution_count": null,
1176
  "metadata": {},
1177
  "outputs": [],
1178
+ "source": [
1179
+ "from datetime import datetime\n",
1180
+ "import numpy as np\n",
1181
+ "\n",
1182
+ "test[test['updatedDate']>= datetime.strptime('2024-08-07 00:00:00+00:00', '%Y-%m-%d %H:%M:%S%z')]"
1183
+ ]
1184
+ },
1185
+ {
1186
+ "cell_type": "code",
1187
+ "execution_count": null,
1188
+ "metadata": {},
1189
+ "outputs": [],
1190
+ "source": [
1191
+ "import datetime\n",
1192
+ "\n",
1193
+ "test_date = datetime(2024, 8, 4)"
1194
+ ]
1195
  }
1196
  ],
1197
  "metadata": {