kimic commited on
Commit
bfe6b6c
1 Parent(s): 6275168

Initial commit for GPT

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
analysis.ipynb ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 58,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "df_input = pd.read_csv('sampled_data.csv')\n",
11
+ "df_inferenced = pd.read_csv('inference_output.csv')"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 59,
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stdout",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "1000\n",
24
+ "1000\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "print(len(df_input))\n",
30
+ "print(len(df_inferenced))"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 60,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "df_combined = pd.concat([df_input, df_inferenced], axis=1)"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 61,
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "data": {
49
+ "text/html": [
50
+ "<div>\n",
51
+ "<style scoped>\n",
52
+ " .dataframe tbody tr th:only-of-type {\n",
53
+ " vertical-align: middle;\n",
54
+ " }\n",
55
+ "\n",
56
+ " .dataframe tbody tr th {\n",
57
+ " vertical-align: top;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe thead th {\n",
61
+ " text-align: right;\n",
62
+ " }\n",
63
+ "</style>\n",
64
+ "<table border=\"1\" class=\"dataframe\">\n",
65
+ " <thead>\n",
66
+ " <tr style=\"text-align: right;\">\n",
67
+ " <th></th>\n",
68
+ " <th>title</th>\n",
69
+ " <th>text</th>\n",
70
+ " <th>label</th>\n",
71
+ " <th>Output</th>\n",
72
+ " <th>Tokens Used</th>\n",
73
+ " <th>Finish Reason</th>\n",
74
+ " </tr>\n",
75
+ " </thead>\n",
76
+ " <tbody>\n",
77
+ " <tr>\n",
78
+ " <th>0</th>\n",
79
+ " <td>Live at Truthdig: Robert Scheer and Thomas Fra...</td>\n",
80
+ " <td>Live at Truthdig: Robert Scheer and Thomas Fra...</td>\n",
81
+ " <td>0</td>\n",
82
+ " <td>Real</td>\n",
83
+ " <td>265</td>\n",
84
+ " <td>stop</td>\n",
85
+ " </tr>\n",
86
+ " <tr>\n",
87
+ " <th>1</th>\n",
88
+ " <td>The Mirage of a Return to Manufacturing Greatn...</td>\n",
89
+ " <td>Half a century ago, harvesting California’s 2....</td>\n",
90
+ " <td>1</td>\n",
91
+ " <td>Real</td>\n",
92
+ " <td>1627</td>\n",
93
+ " <td>stop</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>2</th>\n",
97
+ " <td>British PM expected to offer to fill post-Brex...</td>\n",
98
+ " <td>(Reuters) - The British government has told Ge...</td>\n",
99
+ " <td>1</td>\n",
100
+ " <td>fake</td>\n",
101
+ " <td>200</td>\n",
102
+ " <td>stop</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>3</th>\n",
106
+ " <td>Checkmating Obama</td>\n",
107
+ " <td>Originally published by the Jerusalem Post . \\...</td>\n",
108
+ " <td>0</td>\n",
109
+ " <td>fake</td>\n",
110
+ " <td>2166</td>\n",
111
+ " <td>stop</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>4</th>\n",
115
+ " <td>Thirty-eight injured in police charges in Cata...</td>\n",
116
+ " <td>MADRID (Reuters) - Emergency services have att...</td>\n",
117
+ " <td>1</td>\n",
118
+ " <td>Real</td>\n",
119
+ " <td>176</td>\n",
120
+ " <td>stop</td>\n",
121
+ " </tr>\n",
122
+ " </tbody>\n",
123
+ "</table>\n",
124
+ "</div>"
125
+ ],
126
+ "text/plain": [
127
+ " title \\\n",
128
+ "0 Live at Truthdig: Robert Scheer and Thomas Fra... \n",
129
+ "1 The Mirage of a Return to Manufacturing Greatn... \n",
130
+ "2 British PM expected to offer to fill post-Brex... \n",
131
+ "3 Checkmating Obama \n",
132
+ "4 Thirty-eight injured in police charges in Cata... \n",
133
+ "\n",
134
+ " text label Output \\\n",
135
+ "0 Live at Truthdig: Robert Scheer and Thomas Fra... 0 Real \n",
136
+ "1 Half a century ago, harvesting California’s 2.... 1 Real \n",
137
+ "2 (Reuters) - The British government has told Ge... 1 fake \n",
138
+ "3 Originally published by the Jerusalem Post . \\... 0 fake \n",
139
+ "4 MADRID (Reuters) - Emergency services have att... 1 Real \n",
140
+ "\n",
141
+ " Tokens Used Finish Reason \n",
142
+ "0 265 stop \n",
143
+ "1 1627 stop \n",
144
+ "2 200 stop \n",
145
+ "3 2166 stop \n",
146
+ "4 176 stop "
147
+ ]
148
+ },
149
+ "execution_count": 61,
150
+ "metadata": {},
151
+ "output_type": "execute_result"
152
+ }
153
+ ],
154
+ "source": [
155
+ "df_combined.head()"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 62,
161
+ "metadata": {},
162
+ "outputs": [
163
+ {
164
+ "data": {
165
+ "text/plain": [
166
+ "array(['stop', 'length'], dtype=object)"
167
+ ]
168
+ },
169
+ "execution_count": 62,
170
+ "metadata": {},
171
+ "output_type": "execute_result"
172
+ }
173
+ ],
174
+ "source": [
175
+ "df_combined[\"Finish Reason\"].unique()"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 63,
181
+ "metadata": {},
182
+ "outputs": [
183
+ {
184
+ "data": {
185
+ "text/plain": [
186
+ "994"
187
+ ]
188
+ },
189
+ "execution_count": 63,
190
+ "metadata": {},
191
+ "output_type": "execute_result"
192
+ }
193
+ ],
194
+ "source": [
195
+ "df_combined = df_combined[df_combined[\"Finish Reason\"] != \"length\"]\n",
196
+ "len(df_combined)"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": 64,
202
+ "metadata": {},
203
+ "outputs": [],
204
+ "source": [
205
+ "df_combined.drop(columns=[\"title\", \"text\", \"Tokens Used\", \"Finish Reason\"], inplace=True)"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": 66,
211
+ "metadata": {},
212
+ "outputs": [
213
+ {
214
+ "name": "stderr",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "C:\\Users\\kimi\\AppData\\Local\\Temp\\ipykernel_31372\\3169472720.py:2: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n",
218
+ " df_combined.loc[:, \"Output\"] = df_combined[\"Output\"].str.strip().str.lower().map({\"real\": 1, \"fake\": 0})\n"
219
+ ]
220
+ }
221
+ ],
222
+ "source": [
223
+ "df_combined = df_combined.copy()\n",
224
+ "df_combined.loc[:, \"Output\"] = df_combined[\"Output\"].str.strip().str.lower().map({\"real\": 1, \"fake\": 0})"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 68,
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "text/plain": [
235
+ "994"
236
+ ]
237
+ },
238
+ "execution_count": 68,
239
+ "metadata": {},
240
+ "output_type": "execute_result"
241
+ }
242
+ ],
243
+ "source": [
244
+ "len(df_combined)"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 70,
250
+ "metadata": {},
251
+ "outputs": [
252
+ {
253
+ "name": "stdout",
254
+ "output_type": "stream",
255
+ "text": [
256
+ "Accuracy: 0.7323943661971831\n",
257
+ "F1 Score: 0.5969696969696969\n"
258
+ ]
259
+ },
260
+ {
261
+ "name": "stderr",
262
+ "output_type": "stream",
263
+ "text": [
264
+ "C:\\Users\\kimi\\AppData\\Local\\Temp\\ipykernel_31372\\2541391757.py:14: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n",
265
+ " plt.style.use(\"seaborn-whitegrid\")\n"
266
+ ]
267
+ },
268
+ {
269
+ "data": {
270
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAngAAAIYCAYAAAD+Y5FgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBPklEQVR4nO3deVhUdf//8degKLgkoOaWVsZi7oSC3K5ouGOKmN0Rd7ZZLl/LCvc0U9wqLTW1XKIM01tvKbk1M0tbBXFJzTTBcskFRQQTQdbfH/2cuwkXUIZxzjwf1zXXJeecOec9I9W71+dzPsdUWFhYKAAAABiGk60LAAAAQOmiwQMAADAYGjwAAACDocEDAAAwGBo8AAAAg6HBAwAAMBgaPAAAAIOhwQMAADAYGjwAVsEa6gBgOzR4sHv79u1TZGSkOnXqpObNm6tLly6aMGGCjh8/brVrbtiwQUFBQWrWrJkmTpxYauf18fHRvHnzSu18N7qWj4+PZs+efdX9BQUFat++vXx8fLR27doSnXv16tWaOXPmDY+LiIhQREREic59LVOmTNGcOXMstmVnZys6OloDBw5UQECAmjVrpuDgYL322ms6deqUxbHz5s0zfydXXo0bN1ZAQICGDRumpKQkSVJCQkKR4672SkhIKFbdH3zwgXx8fPT7779f85iIiIgbXu9Wv8e1a9fesI7ieumll7RkyZJbPg+Am1fe1gUAtyImJkbTpk1TQECAXnrpJd155506duyYlixZok2bNun9999XkyZNSv26kydP1j333KMZM2aoVq1apXbeVatWqXbt2qV2vhtxcnLSxo0b9eKLLxbZl5iYqDNnztzUeRcuXCh/f/8bHjdp0qSbOv/fxcfHa9OmTfr888/N21JSUvT000/r1KlTevTRRzVs2DC5uLjo4MGD+uCDD7R+/XqtWLFC9913n8W5Vq1aZf5zfn6+Tp48qTlz5ig8PFzr169XkyZNLI7Zv3+/XnvtNU2cONHid83T0/OGdR85cuSaDfZfTZo0SRcvXjT/PHnyZPP2K6pUqXLD85SVUaNGKSQkREFBQUW+XwBlgwYPdmvnzp2KiopSeHi4xo8fb94eEBCgLl26KDQ0VGPHjtW6detK/drp6elq27atAgICSvW8LVu2LNXz3cgDDzygHTt2aP/+/UUa4fXr1+v+++/XgQMHrHb94jRBxTF9+nT961//UqVKlST9OTw8atQonTp1SmvWrNE999xjPtbf3199+vRRv379NG3aNC1dutTiXH//O/Dz81OdOnUUHh6u2NhYDR482OKYy5cvmz9LSf7+8vPzNWbMGLm5uen06dPXPfbv39OVZq6sf1+Kq1atWurZs6feeOMNLVy40NblAA6JIVrYraVLl6pq1apXTZ88PDw0ZswYde3a1SL52LBhg0JDQ+Xr66u2bdtq4sSJysjIMO+fN2+egoODtXXrVoWEhKhp06bq1q2bYmNjJf1veE6S3nnnHfOQ1pgxY9S5c2eLGn7//fciw5vLly9X9+7d1axZM7Vv316vvvqqRX1/H6I9c+aMxo4dq44dO6p58+YKCwvTl19+aXEdHx8fxcTEaPz48fL395evr69GjBih1NTUG36H/v7+qlGjhj777DOL7Xl5edq0aZN69epV5D0HDx7U8OHD1aZNGzVp0kTt27fX1KlTlZ2dLUnq3LmzTpw4odjYWPP3s3btWjVu3FirV69Wu3bt1KFDByUlJVkM0X744YdFvq/ExETdf//9mjt37jU/w9atW/XLL7+od+/e5m07duxQfHy8Ro4cadHcXeHm5qYRI0borrvuUkFBwQ2/p6ZNm0qSTpw4ccNji2vp0qVKTU3V4MGDS+2c1xpm7dy5s8aMGWP+2cfHR/Pnz1f//v3l5+enBQsWmPft2rVLffv2VbNmzRQSEqINGzZYnOuPP/7Q9OnT9eCDD6pZs2bq3bu31qxZU6SWPn36aMuWLTp06FCpfT4AxUeDB7tUWFio7777ToGBgXJ1db3qMd27d9fw4cPNaceCBQs0cuRItWjRQnPnztWwYcP0+eefKyIiwtycSNLZs2f12muv6V//+pfee+893XXXXRozZowOHz5sMTwXFhamVatW6c477yxWzevXr9fMmTMVHh6upUuXatiwYfr00081derUqx6fmpqqsLAwbd++XSNHjtS8efNUr149DRs2rEgqOWfOHBUUFGj27NkaNWqUtm7dqmnTpt2wJicnJ3Xr1k0bN2602L5t2zZdvnxZQUFBFtvPnDmj8PBwZWVlacaMGVq8eLF69Oih5cuXKzo6WpI0f/581axZUx07drT4fvLz87Vo0SJNnTpVL7zwQpFUKiIiQv7+/po5c6bS0tKUmZmpMWPGqGnTpho6dOg1P8O6devUsmVL1alTx7xt8+bNMplMV21Qr+jXr58mT54sJ6cb/2vwt99+kyQ1aNDghscWR1JSkubPn69p06Zd8/fX2hYuXKhu3bpp9uzZ6tKli3n7K6+8ou7du+udd96Rp6enRo4cqe+++07Sn3MaH330Ua1bt05PPvmkFixYID8/P40fP16LFi2yOL+vr69q1aql//73v2X6uQD8iSFa2KXz58/r8uXLuuuuu4p1fEZGhhYuXKgBAwZYzFvy9vZWeHi41q5dq0cffVSSlJWVpaioKAUGBkqS7rnnHgUFBenrr7/Wk08+aR4Wq127domGyBISElSvXj2Fh4fLyclJ/v7+qlSpks6fP3/V499//32lpaXps88+U/369SVJHTt21KBBgzRr1iz17t3b3Jx4e3tr+vTp5vfu3bu3SNN2LT179lRMTIx++uknc1K1YcMGdenSRS4uLhbHHjp0SPfff7/efvttc+P8j3/8Q9u2bVNiYqKee+45NW7cWBUqVJCHh0eR7+e5555Tp06drlqHyWTStGnT1KdPH73++uuqUKGC0tLStGzZMpUvf+1/VcXHxxdp5I4dOyY3Nze5ublZbM/Pzy9yd2+5cuVkMpnMP+fl5Zn/nJ2drYMHD2ratGmqWrWq+vTpc806iisvL0+jR4/WgAED5O/vXyo3NdyM5s2bW6SH+/fvlyQNGzbMvL1Dhw46cuSI5s+fr3bt2mnt2rU6dOiQVqxYIT8/P0lS+/btlZeXpwULFuiRRx4xf+cmk0lNmzbVtm3byvaDAZBEggc7daWxyc/PL9bxP/74o3JychQSEmKxvVWrVqpXr16ROx7/2phcuenh0qVLt1Cx1KZNGx05ckShoaFasGCBfv75Z4WEhOjxxx+/6vHbt2+Xr6+vubm7ok+fPjp79qx+/fXXq9Z7peasrKxi1eXn56datWqZh2lzcnK0efNmiyHPK9q1a6ePPvpIFStW1G+//aYtW7Zo0aJFSktLU05Ozg2v5e3tfd399evX1+jRoxUbG6tVq1Zp3Lhxuvvuu695fFZWls6dO1ek0b/WEi2PPfaYmjRpYvHavn27xTF/3efn56fw8HBdvnxZ8+bNU82aNW/4Ga/Iz89XXl6e+XXld3XRokW6cOGCXnrppWKfyxqu9XfRo0cPi58ffPBB/fjjj8rMzNT27dtVr149c3N3RZ8+fXT58mXt2bPHYnu9evVs1sACjo4ED3bJzc1NlStX1smTJ695zKVLl5STkyM3NzfzPLsaNWoUOa5GjRr6448/LLb9ddjsSjN5q+u69ezZUwUFBVqxYoXmz5+vt99+W/Xq1dNLL7101aHEjIyMqyaUVz7DhQsXrlrvlZqLW6/JZFL37t21ceNGRUZG6ttvv5WTk5Patm2rlJQUi2OvDAPHxMTo0qVLqlOnjpo3b66KFSsW61rVq1e/4TE9evTQ9OnTlZ+fr3bt2l332CvfwZWbK66oV6+etm7dqosXL1rcXRoVFaXMzExJfyZWV7uL96/zyZydnVWzZs1i1f13gwYNsmge/f39NXbsWC1atEiLFy9WhQoVlJeXZ54DWFBQoPz8fJUrV67E17oZV/tnQVKRJrZ69eoqLCzUxYsXlZGRcc1/hiTL30npz9/Lv/+zBaBs0ODBbrVr104JCQm6fPnyVRuMtWvXKioqSitWrFC1atUk/Tmv7e/LNpw9e7ZISlZSJpOpSJp4tcSvd+/e6t27t/744w999913Wrx4sSIjI9WqVasiy61Uq1btqjdKnD17VpLk7u5+SzX/Vc+ePfXBBx9o37592rBhg7p27SpnZ+cix7333nuKjo7Wq6++qm7duqlq1aqS/pyPWFqmTp0qFxcXubq6asKECUXucv2rK9/B3xuLLl266KOPPtKmTZsUGhpq3t6wYUPzn6+VyDZr1uxWyjebPHmyuZmUpMqVK2v9+vXKzc3VoEGDihwfHBwsf39/LV++/KaveWWo+e83jvy1jhvJyMiwGJpPTU1VuXLlVK1aNVWrVk1Hjx4t8p5r/U5euHChVH9PARQfQ7SwW08++aTS09OLLG4rSefOndOSJUt09913q2XLlmrRooUqVKiguLg4i+N27NihkydP6oEHHrilWipXrmyeF3jFrl27LI554YUXNHz4cElS1apV1aNHDw0dOlT5+flXXW+udevW2r17d5EFm9etW6eaNWted+iypFq2bKl69eopLi5OX3311TVvTti5c6c8PT0VFhZmbu5SUlJ06NAhi6aiODcuXM3mzZu1bt06jRkzRpMmTdJ3332nlStXXvP4ChUqqGbNmkUWLQ4MDJS/v79ef/11i6Hsv7qycLG1NGzYUM2aNTO/GjZsqIcfflhr1qyxeF35nVi4cKF5fbubdSWt/Ov38euvvyo9Pb3Y5/j222/Nfy4oKNDGjRvVokULubi4qHXr1jpx4oR27txp8Z5169bJ2dlZzZs3t9h+6tQp1atX7yY+CYBbRYIHu9WyZUs9//zzeuutt3T48GH169dP7u7uSkpK0rJly5SZman33ntPJpNJbm5uGjx4sObPny9nZ2d16dJFv//+u95++215enpapDw3IygoSMuXL9e4ceM0YMAAcw1/HW5r06aNJk2apJkzZ6pDhw66cOGC5s+fr3vuuUeNGjUqcs4nnnhC69at0xNPPKHhw4fL3d1dn3zyieLj4zVt2rSbbqKupXv37vrwww/l5uZ2zUWKmzdvrgULFui9995Ty5YtdfToUb377rvKycmxmPN3xx136Oeff9b27duL/Ef/WtLS0jRp0iS1bdtW/fr1kyR169ZNM2fOVNu2ba+ZsrZt27ZIM20ymTR79mwNGTJEoaGhGjBggNq0aaOqVavqyJEj+u9//6uEhAS1aNHiqsuoWEutWrWKJLVXGk1vb+9i3zR0LW3atJGrq6tmzJihF154QZmZmZo/f36Rm02u56233lJ+fr7q1Kmjjz/+WL/99pvef/99SVJoaKhWrFih4cOHa8SIEapfv76++uor/ec//9Hw4cN1xx13mM9TWFio3bt3l9qTSgCUDA0e7NqQIUPUuHFjxcTEaPr06UpPT1ft2rXVoUMHPffcc6pbt6752P/7v/9TjRo19NFHH2n16tVyc3NT9+7d9cILL9zyUhVt27bV6NGjtXz5cm3atElNmjTR/Pnz9cgjj5iPeeSRR5Sbm6uVK1dqxYoVcnFxUWBgoCIjI686HFqzZk19/PHHevPNNxUVFaXc3Fw1atRICxYssFjWorT07NlTS5cuVY8ePa7ZPD777LM6f/68PvzwQ73zzjuqU6eOHnroIZlMJr377rvKyMhQtWrV9OSTT2ratGl66qmnzM3BjVwZ0vxrivXKK6+oZ8+eGjdunD788EOLu12v6Natm+Li4nTmzBmLJWuufH+ffPKJ4uLitH79el24cMF8d++CBQvUuXPnq57TXlWtWlVz587Vm2++qWHDhqlevXoaPny4Pvnkk2KfIyoqSrNmzdLRo0fl7e2txYsXmxt+V1dXLV++XG+++abmzp2rixcvqmHDhoqKiioyTL93716lp6ere/fupfkRARSTqZAnggOwY4WFhXrooYfUrVs3DRs2zNbl4P8bO3asMjIyLBZRBlB2mIMHwK6ZTCa9/PLL+vjjjy2eCgLbOXnypDZt2qTnn3/e1qUADosGD4Dd69Chg7p06aJ3333X1qVA0htvvKHBgwebH+sHoOwxRAsAAGAwJHgAAAAGQ4MHAABgMDR4AAAABkODBwAAYDB2t9Cxq+9wW5cAwErOJ863dQkArMTFhh2HtXuHrN2337+77K7BAwAAKBGT4w1YOt4nBgAAMDgSPAAAYGwGeuZ0cZHgAQAAGAwJHgAAMDbm4AEAAMDekeABAABjYw4eAAAA7B0JHgAAMDYHnINHgwcAAIyNIVoAAADYOxI8AABgbA44ROt4nxgAAMDgSPAAAICxMQcPAAAA9o4EDwAAGBtz8AAAAGDvSPAAAICxOeAcPBo8AABgbAzRAgAAwN6R4AEAAGNzwCFaEjwAAACDIcEDAADGxhw8AAAA2DsSPAAAYGwkeAAAACgNGzZsUOPGjeXr62t+RUZGSpL27NmjAQMGyNfXV507d9bq1ast3hsbG6vg4GC1bNlSoaGh2r17d4muTYIHAACMzck2d9Hu27dPDz30kKZPn26xPSMjQ4MHD9aIESM0cOBAJSYmatiwYfLx8VHz5s2VkJCgKVOmaPHixWrevLliYmI0ZMgQbdmyRa6ursW6NgkeAAAwNpOTdV/XsG/fPjVt2rTI9k2bNsnNzU3h4eEqX768AgMDFRISopiYGEnS6tWr1atXL/n5+cnZ2VmDBg2Su7u7NmzYUOyPTIMHAABQygoKCrR//35t3bpVQUFB6tChg1555RVlZGQoKSlJ3t7eFsd7enrq4MGDkqTk5OTr7i8OGjwAAGBsJpN1X1eRlpamxo0bq1u3btqwYYNWrlypI0eOKDIyUpmZmUWGWl1cXHTp0iVJuuH+4qDBAwAAKGU1atRQTEyMwsLC5Orqqrp16yoyMlLffPONCgsLlZ2dbXF8dna2KleuLElydXW97v7ioMEDAADGZoM5eAcPHtQbb7yhwsJC87acnBw5OTmpefPmSkpKsjg+OTlZXl5ekiQvL6/r7i8OGjwAAIBS5ubmppiYGC1ZskR5eXk6efKkXn/9dfXr10/dunVTamqqoqOjlZubq/j4eMXFxal///6SpLCwMMXFxSk+Pl65ubmKjo7WuXPnFBwcXOzrmwr/2lraAVff4bYuAYCVnE+cb+sSAFiJiw0XZnMNnmnV82d9Mfqq27dv367Zs2fr0KFDqlixonr16qXIyEhVrFhR+/btU1RUlA4dOiQPDw8NHTpUoaGh5vd++umnWrhwoVJSUuTp6akJEyaoRYsWxa6JBg/AbYMGDzAuR2zwbImFjgEAgLE54KPKaPAAAICxXWMpEyNzvJYWAADA4EjwAACAsTngEK3jfWIAAACDI8EDAADGxhw8AAAA2DsSPAAAYGzMwQMAAIC9I8EDAADGxhw8AAAA2DsSPAAAYGwOOAePBg8AABibAzZ4jveJAQAADI4EDwAAGBs3WQAAAMDekeABAABjYw4eAAAA7B0JHgAAMDbm4AEAAMDekeABAABjc8A5eDR4AADA2BiiBQAAgL0jwQMAAIZmIsEDAACAvSPBAwAAhkaCBwAAALtHggcAAIzN8QI8EjwAAACjIcEDAACG5ohz8GjwAACAoTlig8cQLQAAgMGQ4AEAAEMjwQMAAIDdI8EDAACGRoIHAAAAu0eCBwAAjM3xAjwSPAAAAKMhwQMAAIbmiHPwaPAAAIChOWKDxxAtAACAwZDgAQAAQyPBAwAAgN0jwQMAAIZGggcAAAC7R4IHAACMzfECPBI8AAAAoyHBAwAAhuaIc/Bo8AAAgKE5YoPHEC0AAIDBkOABAABDI8EDAACA3SPBAwAAxuZ4AR4JHgAAgNGQ4AEAAENjDh4AAADsHgkeAAAwNEdM8GjwAACAoTlig8cQLQAAgMGQ4AEAAEMjwQMAAIDdI8EDAADG5ngBHgkeAACA0ZDgAQAAQ2MOHgAAAOweCR4AADA0R0zwaPAAAIChOWKDxxAtAACAwZDgAQAAY3O8AI8EDwAAwGhI8AAAgKExBw8AAAB2jwQPAAAYGgkeAAAA7B4JHmwirOsDej/qcWXn5Jm3rftqj5565UM9+3AHDQ/vpNo1qul0aobeWbFVi1Z9U+QcIx7rrF4dm6nbM2+XZekASmjjZxs0bvTLqlChonlb5wcf1LQZr2vv3j2aOW2qDicny93DXc88O0Sh/QfYsFoYkSMmeDR4sAm/JndrxfpEPfvqRxbbe3ZoqolDe6n3kPnafeC4/Bo30BdLX9DPh0/pmx1JkqRKLhU0cWgvPR/RxbwNwO1r/0/71CvkIU2Jmm6x/UJGhoY/N1hDh49Q2MMDtXNHokaOGCYvLx81a97cRtXCiByxwWOIFjbh16SBdv18rMj2Dd/8JJ+eE7X7wHGVK+ek6u5VVFgoZfyRZT5m+6qxql2jmt79d9FUD8DtZ/9P+9SkSdMi2zd/sUnV3Nz0yKPhKl++vALaBKpn7xCt+jjGBlUCxmKzBO/ixYvKzMxU5cqVVaVKFVuVARswmUxq2ai+MrNy9OKgB1XOyaSN3/2sCW9/ovQ/snTx0mV53X2ndq0Zr/Lly+nt5V9qzy+/m9/f7Zm3deJMusY/21P3N7ThBwFwQwUFBTrw8365uroqetkS5Rfkq337jnrhxZd1ODlJXl7eFsc3vM9Tn/xnjY2qhWE5XoBXtgleQUGBli1bps6dO6t169bq1KmTWrduraCgIL3zzjsqLCwsy3JgIzXdq2jPL78rdvNutQydoqBBs+XZoKaWRT1uPua3E6lyDxyptuGzNKCbn14a9KB534kz6TaoGsDNOJ+Wpkb3N9aDXbspNm6DPvxopY4ePaJxYyKVeSlTrpVcLY53cXHRpUuXbFQtYBxlmuDNmDFD27Zt08svvyxPT0+5uroqKytLycnJWrhwoS5duqTIyMiyLAk2cCbtDwU/9Zb55+Onz2v8W5/om+Uvq0qlirp46bLy8gokSbt+PqZ3VmzVwJ6t9Gb0ZhtVDOBmVa9RQ+9/+L8hV1dXV418KVKP/fNhPdQvVNlZ2RbHZ2dnq1LlymVdJgyOOXhWFhcXp4ULF6pnz57y9vZW/fr15e3trZ49e2rBggX65JNPyrIc2EhTr7qaMqKPxbaKFcqroKBQzz7cXstnPGGxr0KF8jqfwf/RA/bo0C8H9dbsNyxGaHJycuTk5KSmzZrr8GHLG6V+PZwsTy+vsi4TMJwybfDy8vJ05513XnWfh4eH8vPzy7Ic2Mj5jEt6bmBHvfj4gypXzkn1a7tr2sh+Wh6XoK8SflFIUHP1D/aVyWRSYIuGGvbPTlq8+ltblw3gJlSr5qaVK2IUvWyJ8vLydOrkSc1583X16dtPwV27KTU1VR99GK3c3FxtT4jXhv/GqW+//rYuGwZjMpms+rodlWmD5+/vrwkTJig1NdVie1pamiZOnKiAgICyLAc2cuJMuvqNWKiQoOY6uXWmvosZpZ37j2rkjH9r94HjejRyqUY93U2nv5mlueMfUeTra/SfL3bbumwAN6FW7dqav/BdbfnqS3X4h7/+ObC/mjRtprHjJ8rNzV3vLl6mLz7fqI5tAzR50gSNHjtB/gFtbF02YPdMhWV4Z0NaWpqef/557dixQ9WqVVOlSpWUlZWl9PR0+fn5ae7cufLw8LjuOVx9h5dRtQDK2vnE+bYuAYCVuNhw5V3Plz+z6vmT3+hh1fPfjDL9uj08PLR8+XIdO3ZMSUlJyszMVKVKleTl5aW77767LEsBAAAO4nYdRrUmm/TTDRo0UIMGDWxxaQAAAMPjSRYAAMDQTCbrvm4kPz9fERERGjNmjHnbnj17NGDAAPn6+qpz585avXq1xXtiY2MVHBysli1bKjQ0VLt3l2wuOg0eAACAFc2fP187duww/5yRkaHBgwerb9++SkxMVFRUlKZPn669e/dKkhISEjRlyhTNmDFDiYmJ6tOnj4YMGaKsrKxrXaIIGjwAAGBotlwmZdu2bdq0aZO6du1q3rZp0ya5ubkpPPzP5zAHBgYqJCREMTF/Lgq+evVq9erVS35+fnJ2dtagQYPk7u6uDRs2FPsz0+ABAABYwblz5zR+/Hi9+eabcnX932P5kpKS5O1t+RxmT09PHTx4UJKUnJx83f3FYcOblgEAAKzPFjfRFhQUKDIyUk888YQaNWpksS8zM9Oi4ZMsn8N8o/3FQYIHAABQyt59911VqFBBERERRfa5uroqO7voc5gr///nMN9of3GQ4AEAAENzcir7CO/TTz/VmTNn1KpVK0kyN2ybN2/WqFGj9P3331scn5ycLK///xxmLy8vJSUlFdnfoUOHYl+fBA8AAKCUbdy4Ubt27dKOHTu0Y8cO9e7dW71799aOHTsUHBys1NRURUf/+Rzm+Ph4xcXFqX//P5/DHBYWpri4OMXHxys3N1fR0dE6d+6cgoODi319EjwAAGBot9uDLNzd3bVs2TJFRUWZH9M6YcIEtWnz53OYAwMDNWnSJL366qtKSUmRp6enFi9eLDc3t2Jfo0yfRVsaeBYtYFw8ixYwLls+i7bphC+sev6fphY/WSsrDNECAAAYDEO0AADA0G63IdqyQIIHAABgMCR4AADA0G70ODEjIsEDAAAwGBI8AABgaCR4AAAAsHskeAAAwNAcMMCjwQMAAMbGEC0AAADsHgkeAAAwNAcM8EjwAAAAjIYEDwAAGBpz8AAAAGD3SPAAAIChOWCAR4IHAABgNCR4AADA0BxxDh4NHgAAMDQH7O8YogUAADAaEjwAAGBojjhES4IHAABgMCR4AADA0BwwwCPBAwAAMBoSPAAAYGjMwQMAAIDdI8EDAACG5oABHg0eAAAwNoZoAQAAYPdI8AAAgKE5YIBHggcAAGA0JHgAAMDQmIMHAAAAu0eCBwAADI0EDwAAAHaPBA8AABiaAwZ4NHgAAMDYGKIFAACA3SPBAwAAhuaAAR4JHgAAgNGQ4AEAAENjDh4AAADsHgkeAAAwNAcM8EjwAAAAjIYEDwAAGJqTA0Z4NHgAAMDQHLC/Y4gWAADAaEjwAACAobFMCgAAAOweCR4AADA0J8cL8EjwAAAAjIYEDwAAGBpz8AAAAGD3SPAAAIChOWCAR4MHAACMzSTH6/AYogUAADAYEjwAAGBoLJMCAAAAu0eCBwAADI1lUgAAAGD3SPAAAIChOWCAR4IHAABgNCR4AADA0JwcMMKjwQMAAIbmgP0dQ7QAAABGQ4IHAAAMjWVSAAAAYPdI8AAAgKE5YIBHggcAAGA0xUrwGjVqdMPx6wMHDpRKQQAAAKWJZVKu4cMPP7R2HQAAACglxWrw/P39LX7OyMjQ8ePH1bhxY+Xl5alChQpWKQ4AAOBWOV5+V8I5eJmZmXrppZcUEBCgxx57TEeOHFFwcLB+/fVXa9UHAACAEipRgzdr1ixdunRJn332mZydnVW/fn0FBQUpKirKWvUBAADcEpPJZNXX7ahEy6Rs2bJFcXFxqlatmkwmk5ydnTVmzBh16NDBWvUBAADcEqfbswezqhIleAUFBeb5doWFhUW2AQAAwPZK1OC1adNGr732mrKyssyR5FtvvVXkJgwAAIDbhSMO0ZaowRs7dqwOHz6s1q1b648//pCvr68SExM1evRoa9UHAACAEirRHLzq1atr1apV2rdvn06cOKHatWurefPmKleunLXqAwAAuCW3achmVSV+Fm1mZqaOHz+ulJQUOTk5KTc3lwYPAADgNlKiBm/fvn16+umn5eLiotq1a+vEiROaOXOmlixZooYNG1qrRgAAgJt2u86Ts6YSzcGbPn26nnjiCX399ddatWqVvv32Wz300EN67bXXrFUfAAAASqhECV5ycrKWL19u/tlkMmno0KEKDAws9cIAAABKA+vg3YCPj49+/PFHi20HDhxQ/fr1S7MmAACAUuOIy6QUK8GbP3++JKlOnTp69tlnFRYWprvuuktnzpzRmjVr1LVrV6sWCQAAgOIrVoOXkJBg/vP999+v/fv3a//+/ZKk++67T7/++qt1qgMAALhFt2fGZl3FavD+Ou8OAAAAt7cSr4MXHx+vlJQU87Noc3Nz9csvv2jChAmlXhwAAMCtcrLRPLlt27Zp9uzZOnz4sFxdXdW9e3dFRkbKxcVFe/bs0dSpU5WcnCx3d3cNGTJEAwYMML83NjZWCxYs0NmzZ9WwYUO98sor8vX1Lfa1S9TgTZ06VStXrlTlypUlSfn5+crMzFT79u1LchoAAABDS0tL07PPPqtXX31Vffv2VWpqqp566im99957evzxxzV48GCNGDFCAwcOVGJiooYNGyYfHx81b95cCQkJmjJlihYvXqzmzZsrJiZGQ4YM0ZYtW+Tq6lqs65foLtrPPvtMH330kd566y117txZiYmJevzxx1W7du2b+vAAAADWZjJZ93U1Hh4e+uGHHxQaGiqTyaT09HRdvnxZHh4e2rRpk9zc3BQeHq7y5csrMDBQISEhiomJkSStXr1avXr1kp+fn5ydnTVo0CC5u7trw4YNxf7MJWrwsrKy1LJlS3l6emr//v0ymUwaPny4tm7dWpLTAAAAGF6VKlUkSR07dlRISIhq1qyp0NBQJSUlydvb2+JYT09PHTx4UNKf6w5fb39xlKjBq127ts6dO6eaNWvq9OnTys3NlYuLiy5evFiS0wAAAJQZW6+Dt2nTJn3zzTdycnLSiBEjlJmZWWSo1cXFRZcuXZKkG+4vjhI1eB07dtSgQYOUlpam1q1ba9y4cXr11Vd1zz33lOQ0AAAAZcYWQ7R/5eLiolq1aikyMlLffvutXF1dlZ2dbXFMdna2+R6HG+0vjhI1eC+++KIeeughOTs7a+LEiUpPT1dycrKmTJlSktMAAAAY2q5du9S9e3fl5OSYt+Xk5MjZ2Vmenp5KSkqyOD45OVleXl6SJC8vr+vuL44SNXjOzs56+umnVbVqVdWqVUuLFy9WTEyMKlWqVJLTAAAAlBknk8mqr6vx8fFRdna23nzzTeXk5OjEiROaOXOmwsLC1K1bN6Wmpio6Olq5ubmKj49XXFyc+vfvL0kKCwtTXFyc4uPjlZubq+joaJ07d07BwcHF/symwisL2t2klJQUderUSQcOHLiV0xSbq+/wMrkOgLJ3PnG+rUsAYCUuJV55t/QM+c/PVj3/wv6Nr7o9OTlZ06ZN0759+1S1alWFhIRo2LBhqlChgvbt26eoqCgdOnRIHh4eGjp0qEJDQ83v/fTTT7Vw4UKlpKTI09NTEyZMUIsWLYpdU6k0eB07dizRnR23ggYPMC4aPMC4bNngDV1r3QZvQejVGzxbKtEQ7bUU5w4SAAAAlA0b9tMAAADW54hBVLEavMTExGvuS0tLK7ViAAAAcOuK1eBFRERcd39ZdsbRy8aW2bUAlK0vD56xdQkArKRX0zttdu1SmY9mZ4rV4JXVDRQAAAClzRGHaB2xqQUAADA0brIAAACG5uR4AR4JHgAAgNGQ4AEAAEMjwSuGnJwcffHFF4qOjlZWVhY3YAAAANxmSpTgHTt2TE8++aRyc3N14cIFdezYUf3799f8+fMVFBRkrRoBAABuGnfR3kBUVJRCQ0O1detWlS9fXvfee6+mTp2quXPnWqs+AAAAlFCJGrwff/xRTz/9tEwmk7kbfuihh3T8+HGrFAcAAHCrnEzWfd2OStTgVa1aVampqRbbzp49q2rVqpVqUQAAAKXFZLLu63ZUogYvJCREw4cP1/fff6+CggLt3btXL7/8snr16mWt+gAAAFBCJbrJYujQocrOztbw4cOVlZWliIgIhYWFafjw4daqDwAA4JY43a4xmxWVqMFzdnbW6NGjNXr0aKWlpcnd3d0h70wBAAC4nZWowfvkk0+uua9v3763WAoAAEDpc8THdpWowfv7cigZGRnKysqSn58fDR4AAMBtokQN3ldffWXxc2FhoRYvXqz09PTSrAkAAKDUOOJssltKLU0mk5566il9+umnpVUPAAAAblGJEryr+e2337jRAgAA3La4i/YGIiIiLJq53Nxc/fLLL+rTp0+pFwYAAFAaHLC/K1mDFxAQYPGzk5OTBg0apAcffLBUiwIAAMDNK1GDd/78eY0cOVJVqlSxVj0AAACl6nZ9Xqw1legmi7i4OLm6ulqrFgAAAJSCEiV4/fv31+TJkxUaGqqaNWtazMerW7duqRcHAABwq7jJ4gbef/99SdK///1vc3NXWFgok8mkAwcOlH51AAAAKLFiNXg7d+6Un5+fvvzyS2vXAwAAUKocMMArXoP3zDPPaNeuXapXr5616wEAAMAtKlaDV1hYaO06AAAArMIR76ItVoPHkyoAAIC9Msnx+phiNXhZWVnq0qXLdY9hfh4AAMDtoVgNnrOzs4YPH27tWgAAAEodQ7TXOqh8efXr18/atQAAAKAUcJMFAAAwNEdM8Ir1qLI+ffpYuw4AAACUkmIleJMnT7Z2HQAAAFbhiKuBFCvBAwAAgP0o0bNoAQAA7I0jzsGjwQMAAIbmgCO0DNECAAAYDQkeAAAwNCcHjPBI8AAAAAyGBA8AABiaI95kQYIHAABgMCR4AADA0BxwCh4JHgAAgNGQ4AEAAENzkuNFeCR4AAAABkOCBwAADM0R5+DR4AEAAENjmRQAAADYPRI8AABgaDyqDAAAAHaPBA8AABiaAwZ4JHgAAABGQ4IHAAAMjTl4AAAAsHskeAAAwNAcMMCjwQMAAMbmiMOVjviZAQAADI0EDwAAGJrJAcdoSfAAAAAMhgQPAAAYmuPldyR4AAAAhkOCBwAADI2FjgEAAGD3SPAAAIChOV5+R4MHAAAMzgFHaBmiBQAAMBoSPAAAYGgsdAwAAAC7R4IHAAAMzRHTLEf8zAAAAIZGggcAAAyNOXgAAACweyR4AADA0Bwvv6PBAwAABscQLQAAAOweCR4AADA0R0yzHPEzAwAAGBoJHgAAMDTm4AEAAMDukeABAABDc7z8jgQPAADAcGjwAACAoZlM1n1dy8GDB/XEE0/I399fbdu21ahRo5SWliZJ2rNnjwYMGCBfX1917txZq1evtnhvbGysgoOD1bJlS4WGhmr37t0l+sw0eAAAwNCcZLLq62qys7P19NNPy9fXV999953++9//Kj09XePGjVNGRoYGDx6svn37KjExUVFRUZo+fbr27t0rSUpISNCUKVM0Y8YMJSYmqk+fPhoyZIiysrJK8JkBAABQqk6ePKlGjRpp2LBhqlChgtzd3TVw4EAlJiZq06ZNcnNzU3h4uMqXL6/AwECFhIQoJiZGkrR69Wr16tVLfn5+cnZ21qBBg+Tu7q4NGzYU+/o0eAAAwNBsMUTbsGFDLVmyROXKlTNv+/zzz9WkSRMlJSXJ29vb4nhPT08dPHhQkpScnHzd/cVBgwcAAGBFhYWFmjNnjrZs2aLx48crMzNTrq6uFse4uLjo0qVLknTD/cXBMikAAMDQTDZcKOXixYsaO3as9u/fr48++kg+Pj5ydXXVH3/8YXFcdna2KleuLElydXVVdnZ2kf3u7u7Fvi4JHgAAgBUcO3ZM/fv318WLF7VmzRr5+PhIkry9vZWUlGRxbHJysry8vCRJXl5e191fHDR4AADA0GwxBy8jI0OPP/64HnjgAS1dulQeHh7mfcHBwUpNTVV0dLRyc3MVHx+vuLg49e/fX5IUFhamuLg4xcfHKzc3V9HR0Tp37pyCg4OL/5kLCwsLb+lbK2Ordp+wdQkArKSKs7OtSwBgJb2a3mmza2/Yf8aq5+/ZpOhne//99zVjxgy5uroWeRbu7t27tW/fPkVFRenQoUPy8PDQ0KFDFRoaaj7m008/1cKFC5WSkiJPT09NmDBBLVq0KHZNNHgAbhs0eIBx2bLB27j/rFXP371JTaue/2ZwkwUAADC06z1twqiYgwcAAGAwJHgAAMDQSPAAAABg90jwAACAodlyoWNbIcEDAAAwGBI8AABgaE6OF+CR4AEAABgNCR4AADA0R5yDR4MHAAAMjWVSAAAAYPdI8AAAgKE54hAtCR4AAIDBkOABAABDY5kUAAAA2D0SPAAAYGjMwQMAAIDdI8GDTZw+elgbP1qkU78eUrny5XVf81bqHjFUle+oZj7m2KH9ip7yoiYu/9zivfsTvtGW1dE6f/a0qrp5qEPfcD0Q1KOsPwKAG7iYcV5vjxuigUNGy7OpryTp553b9NnHi5V66nd51KqrbgOfVPOADpKkmc9H6HxqisU5crKz1DN8sB4MjSjz+mEcjrgOHg0eylxuzmUtnzFGfp176bHR05STdUn/WTBDsYtm6rFR01RYWKjdWzdqwwfzlZeba/HeX/fvVuzCmXr4+Ynyaumv337+UR/NGKNaDe5Vvfsa2egTAfi73w7u1Yp503Tu9Anztt9//UXvzxqn/s+8qNZBPXT0l/1aPG2UKlWuKs+mvhr99nKLc3z28RL9vPMHte/Rv6zLh8E4YH/HEC3KXkZqimo1uE+d+keofHlnVapaTa279NbRA/skSZ8smqUdX61X0IBBRd77w/rVatM9VN6+ATKZTGrYxFfPTlsk91p1y/hTALiWxC2f6aM5r6nno89YbP/x+y26t1FztXkwROXKlVfDxi3k1z5YP3z+SZFzJO3bpa//+2/968XJquhaqYwqB4yDBA9lrkbdBvrX2BkW2/YnfKO6Db0kSZ0fflLVqtfUb/t/LPLeE4d/0b1NfLV85lj9nnRA1arXVFDY46pV/96yKB1AMfi09NcDHYJVrlx5LZ/9qnl7QUG+KlR0sTjW5OSklBNHLbYV5OdrzXtvKDjscdWsW78sSobBOTngGC0JHmyqsLBQm1ct1S+7tqnn48MlSdWq17zm8VkXL+j7uFXq2O8xjXr3P+rUP0Kr507R8aQDZVUygBu4w726ypUrmh80C+igX/Ykas+2rcrPz9NvB/dq93dfKi/nssVxu779Qpezs9S+Z1hZlQwYDgkebCb7UqY+WTRLJ389pKcmvaVaDRre8D3lyzvrgaAeauDdRJLU2L+D7m36mX7e/o3qe91v7ZIB3IJ7GzVT+IgJ+vzf72v1otfV8P7m8u/cU78e2GNx3LbNcQoM7qMKFSvaqFIYjePldzZo8BITE294TOvWrcugEthS2ukTWj5zrKrVqKVnpy2yuHv2emredbfy/3bjRWFBgVRYaI0yAZSizD8uqHb9ezVqzgfmbR++OUn1/3KD1B/paTpycJ8eHT7eFiUChlHmDd748eN1/PhxFV7jP8gmk0kHDjDcZmRZF//Q+1NfUsMmvnro2Ug5ORV/pkDr4D5a//48ebZorXub+OpA4nf6bf+PevCRp6xYMYDSkHrquBZOHqn/i1qg2vXv0d74r7V/x/caOXOx+ZjfDu7THe41VL02N06hFDlghFfmDd7KlSv1yCOPaOTIkerRg7XLHNGurzcqI/WMfor/Wvvjv7bYN+GDDdd97wOdeshkctJnHy5Q+tnTcqtRSwNGTFDde72tWTKAUnC3dxP1+ddQvT9znDIvpOvOenfrqbEzVLvB/26SOpdyUtWq17BhlYAxmAqvFaVZ0c6dOxUZGanNmzeXKL2RpFW7T9z4IAB2qYqzs61LAGAlvZreabNrJxzOsOr5A+4r3jSjsmSTu2j9/Pw0YsQInT9/3haXBwAADsRksu7rdmSzu2j79u1rq0sDAAAYGsukAAAAQ7tNQzarYqFjAAAAgyHBAwAAxuaAER4JHgAAgMGQ4AEAAEMzOWCER4IHAABgMCR4AADA0G7XteqsiQQPAADAYEjwAACAoTlggEeDBwAADM4BOzyGaAEAAAyGBA8AABgay6QAAADA7pHgAQAAQ2OZFAAAANg9EjwAAGBoDhjgkeABAAAYDQkeAAAwNgeM8GjwAACAobFMCgAAAOweCR4AADA0lkkBAACA3SPBAwAAhuaAAR4JHgAAgNGQ4AEAAGNzwAiPBA8AAMBgSPAAAIChOeI6eDR4AADA0FgmBQAAAHaPBA8AABiaAwZ4JHgAAABGQ4IHAACMzQEjPBI8AAAAgyHBAwAAhuaIy6SQ4AEAABgMCR4AADA0R1wHjwYPAAAYmgP2dwzRAgAAGA0JHgAAMDYHjPBI8AAAAAyGBA8AABgay6QAAADA7pHgAQAAQ3PEZVJI8AAAAAyGBA8AABiaAwZ4NHgAAMDgHLDDY4gWAADAYEjwAACAobFMCgAAAOweCR4AADA0lkkBAACA3SPBAwAAhuaAAR4JHgAAgNGQ4AEAAGNzwAiPBg8AABgay6QAAADA7pHgAQAAQ2OZFAAAAJSqtLQ0BQcHKyEhwbxtz549GjBggHx9fdW5c2etXr3a4j2xsbEKDg5Wy5YtFRoaqt27d5fomjR4AADA0ExWfl3Pzp07NXDgQB07dsy8LSMjQ4MHD1bfvn2VmJioqKgoTZ8+XXv37pUkJSQkaMqUKZoxY4YSExPVp08fDRkyRFlZWcX+zDR4AAAAVhAbG6uXX35ZI0eOtNi+adMmubm5KTw8XOXLl1dgYKBCQkIUExMjSVq9erV69eolPz8/OTs7a9CgQXJ3d9eGDRuKfW0aPAAAYGgmk3Vf19KuXTt98cUX6tmzp8X2pKQkeXt7W2zz9PTUwYMHJUnJycnX3V8c3GQBAABgBTVr1rzq9szMTLm6ulpsc3Fx0aVLl4q1vzhI8AAAgMHZchZeUa6ursrOzrbYlp2drcqVKxdrf3HQ4AEAAEOz1RDttXh7eyspKcliW3Jysry8vCRJXl5e191fHDR4AAAAZSg4OFipqamKjo5Wbm6u4uPjFRcXp/79+0uSwsLCFBcXp/j4eOXm5io6Olrnzp1TcHBwsa/BHDwAAGBot9s6x+7u7lq2bJmioqI0d+5ceXh4aMKECWrTpo0kKTAwUJMmTdKrr76qlJQUeXp6avHixXJzcyv2NUyFhYWFVqrfKlbtPmHrEgBYSRVnZ1uXAMBKejW902bXPpmeY9Xz13WrYNXz3wwSPAAAYGg8qgwAAAB2jwQPAAAYmum2m4VnfSR4AAAABkOCBwAAjM3xAjwaPAAAYGwO2N8xRAsAAGA0JHgAAMDQWCYFAAAAdo8EDwAAGBrLpAAAAMDukeABAABjc7wAjwQPAADAaEjwAACAoTlggEeCBwAAYDQkeAAAwNAccR08GjwAAGBoLJMCAAAAu0eCBwAADM0Rh2hJ8AAAAAyGBg8AAMBgaPAAAAAMhjl4AADA0JiDBwAAALtHggcAAAzNEdfBo8EDAACGxhAtAAAA7B4JHgAAMDQHDPBI8AAAAIyGBA8AABibA0Z4JHgAAAAGQ4IHAAAMzRGXSSHBAwAAMBgSPAAAYGiOuA4eDR4AADA0B+zvGKIFAAAwGhI8AABgbA4Y4ZHgAQAAGAwJHgAAMDSWSQEAAIDdI8EDAACG5ojLpJDgAQAAGIypsLCw0NZFAAAAoPSQ4AEAABgMDR4AAIDB0OABAAAYDA0eAACAwdDg4bZ07tw5DR06VK1atVJAQICioqKUl5dn67IAlKK0tDQFBwcrISHB1qUAhkODh9vSCy+8oEqVKunbb7/VmjVrtG3bNkVHR9u6LAClZOfOnRo4cKCOHTtm61IAQ6LBw23n6NGj2r59uyIjI+Xq6qr69etr6NChiomJsXVpAEpBbGysXn75ZY0cOdLWpQCGRYOH205SUpLc3NxUq1Yt87b77rtPJ0+e1IULF2xYGYDS0K5dO33xxRfq2bOnrUsBDIsGD7edzMxMubq6Wmy78vOlS5dsURKAUlSzZk2VL8+TMgFrosHDbadSpUrKysqy2Hbl58qVK9uiJAAA7AoNHm47Xl5eSk9PV2pqqnnb4cOHVbt2bVWtWtWGlQEAYB9o8HDbueeee+Tn56dp06bp4sWLOn78uBYsWKCwsDBblwYAgF2gwcNtae7cucrLy1OXLl308MMPq3379ho6dKitywIAwC6YCgsLC21dBAAAAEoPCR4AAIDB0OABAAAYDA0eAACAwdDgAQAAGAwNHgAAgMHQ4AEAABgMDR4AAIDB0OABKFNHjhyxdQkAYHg0eIDBdO7cWc2aNZOvr698fX3VsmVLtWvXTjNnzlRBQUGpXSciIkLz5s2TJE2cOFETJ0684Xu++uorPfXUUzd9zbVr16pz585X3ZeQkCAfH5+bPrePj48SEhJu6r3z5s1TRETETV8bAEpbeVsXAKD0TZ48WaGhoeaff/nlFw0aNEiurq4aMWJEqV/vtddeK9Zx6enp4uE5AGB9JHiAA/Dx8VHr1q31888/S/ozfRszZoyCgoLUqVMnXbx4UceOHdNzzz2ngIAABQUFac6cOcrJyTGfY/Xq1erSpYt8fX01evRoZWVlmfeNGTNGY8aMMf/8wQcfKDg4WL6+vgoNDdW2bduUkJCgSZMm6eTJk/L19VVKSopycnL09ttvq0uXLvL399czzzyjo0ePms9z+PBhRUREyNfXVyEhIeb6b0ZKSopeeOEFde7cWS1atFCXLl20Zs0ai2O+++479ejRQwEBARoxYoTOnj1r3rd//35FRESodevW6tq1q6Kjo2lWAdy2aPAAg8vNzVVCQoLi4+PVtm1b8/YffvhBK1eu1Lp16+Tk5KRBgwbJy8tL33zzjVasWKEffvjBPAS7bds2vfbaa5o6daoSExPVokUL7du376rXW7t2rRYsWKBZs2Zp586d+uc//6khQ4bIx8dHkydPVt26dbV7927VqlVLc+bM0datWxUdHa1vv/1WLVq00JNPPqnLly8rNzdXzz77rLy8vBQfH6/Zs2dr8+bNN/09TJgwQc7Ozlq/fr127dqlxx57TFOmTFFmZqb5mK+//lpLlizRl19+qdzcXL388suS/mwOH3/8cXXv3l0//PCDFixYoBUrVmjVqlU3XQ8AWBMNHmBAkydPVqtWrdSqVSsFBgZqypQpeuKJJ/TYY4+Zj+nQoYNq1aqlO+64Q1u3blVOTo5efPFFVaxYUXXq1NHzzz+vmJgYSdK6devUtWtXBQYGqnz58nr00UfVuHHjq147NjZWAwcOlK+vr5ycnDRgwAAtW7ZMLi4uFscVFhZq5cqVevHFF1W/fn1VrFhRw4YNU25urrZu3ardu3fr1KlTGjVqlCpWrCgvLy898cQTN/2dTJ06VZMmTZKzs7NOnjypypUrKzs7WxkZGeZjRowYoXr16qlKlSoaNWqU4uPjlZKSonXr1um+++5TeHi4nJ2d5enpqaeeesr8/QDA7YY5eIABTZo0yWIO3tXceeed5j+fOHFCaWlpat26tXlbYWGhcnNzde7cOaWkpKhJkyYW769fv/5Vz3v27FnVrVvXYtsDDzxQ5Li0tDRdunRJzz//vJyc/vf/mrm5uTpx4oRycnLk7u5u0Rg2aNDgup/peo4fP65Zs2bpyJEjuueee3T33XdLksWNJ3fddZf5z1c+Q0pKik6cOKH9+/erVatW5v0FBQUqV67cTdcDANZEgwc4KJPJZP5z7dq11aBBA23cuNG87eLFizp37pw8PDxUu3ZtHT9+3OL9p0+flpeXV5Hz1qlTR6dOnbLYNmfOHPXp08dim7u7uypWrKhly5apZcuW5u2//vqratWqpQMHDigtLU2ZmZmqXLmy+Zo348pw74svvqhHH31UJpNJP/30k9atW2dx3JkzZ9SoUSNJMn/eu+66S7Vr11ZAQICWLl1qPvb8+fMWw7sAcDthiBaAgoKClJmZqSVLlignJ0cXLlzQ6NGjNXLkSJlMJvXv31+bN2/Wli1blJeXp9jYWO3Zs+eq5woNDdWqVau0d+9eFRQU6D//+Y9iYmLMDV1WVpby8vLk5OSksLAwvfnmmzp9+rQKCgoUGxur3r176+jRo/L19dW9996rqVOnKisrS0ePHtWyZctu+FlOnz5t8Tpz5oxyc3OVnZ0tFxcXmUwmnTx5Uq+//rqkP5u/K+bNm6eUlBRlZGRoxowZ6tq1qzw8PBQSEqIff/xR69atU15ens6cOaPnnntOM2bMKJ2/AAAoZSR4AFSlShVFR0drxowZWrJkiQoKChQQEKCFCxdKkvz8/DRr1izNmDFDI0eOVJs2bSxu2PirkJAQXbhwQZGRkTp79qw8PT21ePFieXh4qHXr1qpevbpat26tlStXavTo0Zo3b54effRRpaenq379+po7d655ft97772niRMn6h//+Idq1KihLl26aNOmTdf9LB07drT4uUaNGvr+++81bdo0vf3225o6daqqV6+uhx9+WMnJyTp06JDuvfdeSVL79u318MMPKzs7W0FBQRo3bpwkqV69elqyZIneeOMNTZ06VeXKlVOnTp00fvz4W/reAcBaTIXc5w8AAGAoDNECAAAYDA0eAACAwdDgAQAAGAwNHgAAgMHQ4AEAABgMDR4AAIDB0OABAAAYDA0eAACAwdDgAQAAGAwNHgAAgMHQ4AEAABjM/wNYtlCy4DEP/AAAAABJRU5ErkJggg==",
271
+ "text/plain": [
272
+ "<Figure size 800x600 with 2 Axes>"
273
+ ]
274
+ },
275
+ "metadata": {},
276
+ "output_type": "display_data"
277
+ }
278
+ ],
279
+ "source": [
280
+ "import pandas as pd\n",
281
+ "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix\n",
282
+ "import matplotlib.pyplot as plt\n",
283
+ "import seaborn as sns\n",
284
+ "\n",
285
+ "accuracy = accuracy_score(df_combined[\"label\"], df_combined[\"Output\"])\n",
286
+ "f1 = f1_score(df_combined[\"label\"], df_combined[\"Output\"])\n",
287
+ "\n",
288
+ "print(f\"Accuracy: {accuracy}\")\n",
289
+ "print(f\"F1 Score: {f1}\")\n",
290
+ "\n",
291
+ "conf_matrix = confusion_matrix(df_combined[\"label\"], df_combined[\"Output\"])\n",
292
+ "\n",
293
+ "plt.style.use(\"seaborn-whitegrid\")\n",
294
+ "plt.figure(figsize=(8, 6))\n",
295
+ "sns.heatmap(conf_matrix, annot=True, fmt=\"d\", cmap=\"Blues\")\n",
296
+ "plt.title(\"Confusion Matrix (GPT-4 Turbo)\")\n",
297
+ "plt.ylabel(\"True Label\")\n",
298
+ "plt.xlabel(\"Predicted Label\")\n",
299
+ "plt.show()"
300
+ ]
301
+ }
302
+ ],
303
+ "metadata": {
304
+ "kernelspec": {
305
+ "display_name": "torch",
306
+ "language": "python",
307
+ "name": "python3"
308
+ },
309
+ "language_info": {
310
+ "codemirror_mode": {
311
+ "name": "ipython",
312
+ "version": 3
313
+ },
314
+ "file_extension": ".py",
315
+ "mimetype": "text/x-python",
316
+ "name": "python",
317
+ "nbconvert_exporter": "python",
318
+ "pygments_lexer": "ipython3",
319
+ "version": "3.10.11"
320
+ }
321
+ },
322
+ "nbformat": 4,
323
+ "nbformat_minor": 2
324
+ }
data_2/WELFake_Dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665331424230fc452e9482c3547a6a199a2c29745ade8d236950d1d105223773
3
+ size 245086152
data_3/news_articles.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53855240e9036a7d6c204e72bd0fa9d37a10f8e1bd2b2fdf34b962569ef271c6
3
+ size 10969548
inference.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import os
3
+ import csv
4
+ import pandas as pd
5
+
6
+ df = pd.read_csv("sampled_data.csv")
7
+
8
+ df["text"] = df["text"].str.replace(
9
+ r"(\b[A-Z]{2,}(?:\s[A-Z]{2,})*\s\(Reuters\)\s-|\(Reuters\))", "", regex=True
10
+ )
11
+
12
+ df["text"] = df["text"].str.replace(r"Featured image via .+?\.($|\s)", "", regex=True)
13
+
14
+ df["text"] = df["title"] + " " + df["text"]
15
+
16
+ df = df[["text", "label"]]
17
+
18
+ client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
19
+
20
+ system_prompt = """You are an expert in identifying fake news and disinformation. Please identify whether the piece of news is real or fake.
21
+ Please think step-by-step as you answer the question. However, please only respond with 'real' if the news is real or 'fake' if the news is fake.
22
+ Do not respond with any other words or phrases.
23
+ If you are unsure if the news is real or fake, please still make an educational guess."""
24
+
25
+ for i in range(961, len(df)):
26
+ response = client.chat.completions.create(
27
+ model="gpt-4-1106-preview",
28
+ max_tokens=10,
29
+ messages=[
30
+ {
31
+ "role": "system",
32
+ "content": system_prompt,
33
+ },
34
+ {
35
+ "role": "user",
36
+ "content": str(df.iloc[i]["text"]),
37
+ },
38
+ ],
39
+ )
40
+ # Extract the response message
41
+ output = response.choices[0].message.content
42
+ tokens_used = response.usage.total_tokens
43
+ finish_reason = response.choices[0].finish_reason
44
+
45
+ with open("inference_output.csv", "a", newline="", encoding="utf-8") as file:
46
+ writer = csv.writer(file)
47
+
48
+ # If the file is empty, write a header
49
+ if file.tell() == 0:
50
+ writer.writerow(["Output", "Tokens Used", "Finish Reason"])
51
+
52
+ # Write the data
53
+ writer.writerow([output, tokens_used, finish_reason])
54
+
55
+ if i % 50 == 0:
56
+ print(f"Batch: {i} / {len(df)}")
inference_output.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b523a4c2768bd211660ff526f8eccffefe0fa685985cee3a3ebc4ef0d833fe
3
+ size 15566
preprocessing.ipynb ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import numpy as np"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 6,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "# Load the datasets\n",
20
+ "df_1 = pd.read_csv(\"data_2/WELFake_Dataset.csv\")\n",
21
+ "df_2 = pd.read_csv(\"data_3/news_articles.csv\")\n",
22
+ "\n",
23
+ "# Drop index\n",
24
+ "df_1.drop(df_1.columns[0], axis=1, inplace=True)\n",
25
+ "df_1.dropna(inplace=True)\n",
26
+ "\n",
27
+ "# Swapping labels around since it originally is the opposite\n",
28
+ "df_1[\"label\"] = df_1[\"label\"].map({0: 1, 1: 0})\n",
29
+ "\n",
30
+ "# Add labels\n",
31
+ "df_2.drop(\n",
32
+ " columns=[\n",
33
+ " \"author\",\n",
34
+ " \"published\",\n",
35
+ " \"site_url\",\n",
36
+ " \"main_img_url\",\n",
37
+ " \"type\",\n",
38
+ " \"text_without_stopwords\",\n",
39
+ " \"title_without_stopwords\",\n",
40
+ " \"hasImage\",\n",
41
+ " ],\n",
42
+ " inplace=True,\n",
43
+ ")\n",
44
+ "# Map Real to 1 and Fake to 0\n",
45
+ "df_2[\"label\"] = df_2[\"label\"].map({\"Real\": 1, \"Fake\": 0})\n",
46
+ "df_2 = df_2[df_2[\"label\"].isin([1, 0])]\n",
47
+ "\n",
48
+ "# Drop rows where the language is not 'english'\n",
49
+ "df_2 = df_2[df_2[\"language\"] == \"english\"]\n",
50
+ "df_2.drop(columns=[\"language\"], inplace=True)\n",
51
+ "\n",
52
+ "# Convert \"no title\" to empty string\n",
53
+ "df_2[\"title\"] = df_2[\"title\"].apply(lambda x: \"\" if x == \"no title\" else x)\n",
54
+ "\n",
55
+ "df_2.dropna(inplace=True)\n",
56
+ "\n",
57
+ "random_1 = df_1.sample(n=500, random_state=42)\n",
58
+ "random_2 = df_2.sample(n=500, random_state=42)\n",
59
+ "\n",
60
+ "# Combine the datasets\n",
61
+ "df = pd.concat([random_1, random_2], ignore_index=True)\n",
62
+ "\n",
63
+ "df[\"label\"] = df[\"label\"].astype(int)\n",
64
+ "\n",
65
+ "df.to_csv(\"sampled_data.csv\", index=False)"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 7,
71
+ "metadata": {},
72
+ "outputs": [
73
+ {
74
+ "data": {
75
+ "text/html": [
76
+ "<div>\n",
77
+ "<style scoped>\n",
78
+ " .dataframe tbody tr th:only-of-type {\n",
79
+ " vertical-align: middle;\n",
80
+ " }\n",
81
+ "\n",
82
+ " .dataframe tbody tr th {\n",
83
+ " vertical-align: top;\n",
84
+ " }\n",
85
+ "\n",
86
+ " .dataframe thead th {\n",
87
+ " text-align: right;\n",
88
+ " }\n",
89
+ "</style>\n",
90
+ "<table border=\"1\" class=\"dataframe\">\n",
91
+ " <thead>\n",
92
+ " <tr style=\"text-align: right;\">\n",
93
+ " <th></th>\n",
94
+ " <th>title</th>\n",
95
+ " <th>text</th>\n",
96
+ " <th>label</th>\n",
97
+ " </tr>\n",
98
+ " </thead>\n",
99
+ " <tbody>\n",
100
+ " <tr>\n",
101
+ " <th>0</th>\n",
102
+ " <td>Live at Truthdig: Robert Scheer and Thomas Fra...</td>\n",
103
+ " <td>Live at Truthdig: Robert Scheer and Thomas Fra...</td>\n",
104
+ " <td>0</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>1</th>\n",
108
+ " <td>The Mirage of a Return to Manufacturing Greatn...</td>\n",
109
+ " <td>Half a century ago, harvesting California’s 2....</td>\n",
110
+ " <td>1</td>\n",
111
+ " </tr>\n",
112
+ " <tr>\n",
113
+ " <th>2</th>\n",
114
+ " <td>British PM expected to offer to fill post-Brex...</td>\n",
115
+ " <td>(Reuters) - The British government has told Ge...</td>\n",
116
+ " <td>1</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>3</th>\n",
120
+ " <td>Checkmating Obama</td>\n",
121
+ " <td>Originally published by the Jerusalem Post . \\...</td>\n",
122
+ " <td>0</td>\n",
123
+ " </tr>\n",
124
+ " <tr>\n",
125
+ " <th>4</th>\n",
126
+ " <td>Thirty-eight injured in police charges in Cata...</td>\n",
127
+ " <td>MADRID (Reuters) - Emergency services have att...</td>\n",
128
+ " <td>1</td>\n",
129
+ " </tr>\n",
130
+ " </tbody>\n",
131
+ "</table>\n",
132
+ "</div>"
133
+ ],
134
+ "text/plain": [
135
+ " title \\\n",
136
+ "0 Live at Truthdig: Robert Scheer and Thomas Fra... \n",
137
+ "1 The Mirage of a Return to Manufacturing Greatn... \n",
138
+ "2 British PM expected to offer to fill post-Brex... \n",
139
+ "3 Checkmating Obama \n",
140
+ "4 Thirty-eight injured in police charges in Cata... \n",
141
+ "\n",
142
+ " text label \n",
143
+ "0 Live at Truthdig: Robert Scheer and Thomas Fra... 0 \n",
144
+ "1 Half a century ago, harvesting California’s 2.... 1 \n",
145
+ "2 (Reuters) - The British government has told Ge... 1 \n",
146
+ "3 Originally published by the Jerusalem Post . \\... 0 \n",
147
+ "4 MADRID (Reuters) - Emergency services have att... 1 "
148
+ ]
149
+ },
150
+ "execution_count": 7,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "df.head()"
157
+ ]
158
+ }
159
+ ],
160
+ "metadata": {
161
+ "kernelspec": {
162
+ "display_name": "torch",
163
+ "language": "python",
164
+ "name": "python3"
165
+ },
166
+ "language_info": {
167
+ "codemirror_mode": {
168
+ "name": "ipython",
169
+ "version": 3
170
+ },
171
+ "file_extension": ".py",
172
+ "mimetype": "text/x-python",
173
+ "name": "python",
174
+ "nbconvert_exporter": "python",
175
+ "pygments_lexer": "ipython3",
176
+ "version": "3.10.11"
177
+ }
178
+ },
179
+ "nbformat": 4,
180
+ "nbformat_minor": 2
181
+ }
sampled_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406b12a0d8e60d0c558d12a131f2013319b9eb910af92590a007511fb8904017
3
+ size 3510245