Spaces:
Sleeping
Sleeping
Add RottenTomatos, arrumando tokenizador, arrumando paths
#2
by
AndreMitri
- opened
- .gitattributes +1 -0
- app.py +5 -2
- data/rotten_tomatos.csv +3 -0
- notebooks_explicativos/Estatistico.ipynb +1 -1
- notebooks_explicativos/Neural_Bert.ipynb +257 -11
- notebooks_explicativos/Simbolico.ipynb +1 -1
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
data/imdb_reviews.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
data/imdb_reviews.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/rotten_tomatos.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -2,10 +2,10 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
from preprocess_data import preprocess_text,get_stopwords
|
4 |
from datasets import load_dataset
|
5 |
-
from transformers import pipeline
|
6 |
from wordnet import wordnet_pipeline
|
7 |
|
8 |
-
dataset = load_dataset('
|
9 |
|
10 |
dataframes = {}
|
11 |
for split in dataset.keys():
|
@@ -17,6 +17,9 @@ for split in dataset.keys():
|
|
17 |
MODEL_PATH = 'danielcd99/BERT_imdb'
|
18 |
|
19 |
def load_pipeline():
|
|
|
|
|
|
|
20 |
pipe=pipeline(
|
21 |
"text-classification",
|
22 |
model=MODEL_PATH
|
|
|
2 |
import pandas as pd
|
3 |
from preprocess_data import preprocess_text,get_stopwords
|
4 |
from datasets import load_dataset
|
5 |
+
from transformers import AutoTokenizer, pipeline
|
6 |
from wordnet import wordnet_pipeline
|
7 |
|
8 |
+
dataset = load_dataset('AndreMitri/rotten_tomatos')
|
9 |
|
10 |
dataframes = {}
|
11 |
for split in dataset.keys():
|
|
|
17 |
MODEL_PATH = 'danielcd99/BERT_imdb'
|
18 |
|
19 |
def load_pipeline():
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
21 |
+
tokenizer.model_max_length = 200
|
22 |
+
|
23 |
pipe=pipeline(
|
24 |
"text-classification",
|
25 |
model=MODEL_PATH
|
data/rotten_tomatos.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aab34e6b1357deec1bfca34bcb3b5ad44f56b5064f9222e4e2e60a6e97bd1a5
|
3 |
+
size 446463
|
notebooks_explicativos/Estatistico.ipynb
CHANGED
@@ -402,7 +402,7 @@
|
|
402 |
}
|
403 |
],
|
404 |
"source": [
|
405 |
-
"db = pd.read_csv('imdb_reviews.csv')\n",
|
406 |
"db.head(5)"
|
407 |
]
|
408 |
},
|
|
|
402 |
}
|
403 |
],
|
404 |
"source": [
|
405 |
+
"db = pd.read_csv('../data/imdb_reviews.csv')\n",
|
406 |
"db.head(5)"
|
407 |
]
|
408 |
},
|
notebooks_explicativos/Neural_Bert.ipynb
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
-
"execution_count":
|
79 |
"metadata": {
|
80 |
"colab": {
|
81 |
"base_uri": "https://localhost:8080/",
|
@@ -149,13 +149,13 @@
|
|
149 |
"4 Petter Mattei's \"Love in the Time of Money\" is... positive"
|
150 |
]
|
151 |
},
|
152 |
-
"execution_count":
|
153 |
"metadata": {},
|
154 |
"output_type": "execute_result"
|
155 |
}
|
156 |
],
|
157 |
"source": [
|
158 |
-
"df_reviews = pd.read_csv('imdb_reviews.csv')\n",
|
159 |
"df_reviews.head()"
|
160 |
]
|
161 |
},
|
@@ -538,7 +538,7 @@
|
|
538 |
},
|
539 |
{
|
540 |
"cell_type": "code",
|
541 |
-
"execution_count":
|
542 |
"metadata": {
|
543 |
"colab": {
|
544 |
"base_uri": "https://localhost:8080/"
|
@@ -551,8 +551,8 @@
|
|
551 |
"name": "stderr",
|
552 |
"output_type": "stream",
|
553 |
"text": [
|
554 |
-
"
|
555 |
-
"
|
556 |
]
|
557 |
}
|
558 |
],
|
@@ -574,7 +574,7 @@
|
|
574 |
},
|
575 |
{
|
576 |
"cell_type": "code",
|
577 |
-
"execution_count":
|
578 |
"metadata": {
|
579 |
"id": "LKEjDZCHpk4e"
|
580 |
},
|
@@ -887,7 +887,6 @@
|
|
887 |
},
|
888 |
"outputs": [],
|
889 |
"source": [
|
890 |
-
"\n",
|
891 |
"# Load both accuracy and f1 metrics\n",
|
892 |
"accuracy_metric = evaluate.load(\"accuracy\")\n",
|
893 |
"f1_metric = evaluate.load(\"f1\")\n",
|
@@ -1175,7 +1174,7 @@
|
|
1175 |
},
|
1176 |
{
|
1177 |
"cell_type": "code",
|
1178 |
-
"execution_count":
|
1179 |
"metadata": {
|
1180 |
"id": "lOHVSyfJJ8zK"
|
1181 |
},
|
@@ -1188,7 +1187,7 @@
|
|
1188 |
},
|
1189 |
{
|
1190 |
"cell_type": "code",
|
1191 |
-
"execution_count":
|
1192 |
"metadata": {
|
1193 |
"id": "t-T7hDZ2J1Qk"
|
1194 |
},
|
@@ -1261,6 +1260,253 @@
|
|
1261 |
"source": [
|
1262 |
"get_prediction(\"This movie is awesome!\")"
|
1263 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1264 |
}
|
1265 |
],
|
1266 |
"metadata": {
|
@@ -1283,7 +1529,7 @@
|
|
1283 |
"name": "python",
|
1284 |
"nbconvert_exporter": "python",
|
1285 |
"pygments_lexer": "ipython3",
|
1286 |
-
"version": "3.
|
1287 |
}
|
1288 |
},
|
1289 |
"nbformat": 4,
|
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
+
"execution_count": 25,
|
79 |
"metadata": {
|
80 |
"colab": {
|
81 |
"base_uri": "https://localhost:8080/",
|
|
|
149 |
"4 Petter Mattei's \"Love in the Time of Money\" is... positive"
|
150 |
]
|
151 |
},
|
152 |
+
"execution_count": 25,
|
153 |
"metadata": {},
|
154 |
"output_type": "execute_result"
|
155 |
}
|
156 |
],
|
157 |
"source": [
|
158 |
+
"df_reviews = pd.read_csv('../data/imdb_reviews.csv')\n",
|
159 |
"df_reviews.head()"
|
160 |
]
|
161 |
},
|
|
|
538 |
},
|
539 |
{
|
540 |
"cell_type": "code",
|
541 |
+
"execution_count": 22,
|
542 |
"metadata": {
|
543 |
"colab": {
|
544 |
"base_uri": "https://localhost:8080/"
|
|
|
551 |
"name": "stderr",
|
552 |
"output_type": "stream",
|
553 |
"text": [
|
554 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
555 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
556 |
]
|
557 |
}
|
558 |
],
|
|
|
574 |
},
|
575 |
{
|
576 |
"cell_type": "code",
|
577 |
+
"execution_count": 36,
|
578 |
"metadata": {
|
579 |
"id": "LKEjDZCHpk4e"
|
580 |
},
|
|
|
887 |
},
|
888 |
"outputs": [],
|
889 |
"source": [
|
|
|
890 |
"# Load both accuracy and f1 metrics\n",
|
891 |
"accuracy_metric = evaluate.load(\"accuracy\")\n",
|
892 |
"f1_metric = evaluate.load(\"f1\")\n",
|
|
|
1174 |
},
|
1175 |
{
|
1176 |
"cell_type": "code",
|
1177 |
+
"execution_count": 27,
|
1178 |
"metadata": {
|
1179 |
"id": "lOHVSyfJJ8zK"
|
1180 |
},
|
|
|
1187 |
},
|
1188 |
{
|
1189 |
"cell_type": "code",
|
1190 |
+
"execution_count": 28,
|
1191 |
"metadata": {
|
1192 |
"id": "t-T7hDZ2J1Qk"
|
1193 |
},
|
|
|
1260 |
"source": [
|
1261 |
"get_prediction(\"This movie is awesome!\")"
|
1262 |
]
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"cell_type": "markdown",
|
1266 |
+
"metadata": {},
|
1267 |
+
"source": [
|
1268 |
+
"# Avaliação do modelo em novos dados\n",
|
1269 |
+
"Avaliação realizada em outro dataset, as reviews do RottenTomatoes"
|
1270 |
+
]
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"cell_type": "code",
|
1274 |
+
"execution_count": 30,
|
1275 |
+
"metadata": {},
|
1276 |
+
"outputs": [
|
1277 |
+
{
|
1278 |
+
"name": "stderr",
|
1279 |
+
"output_type": "stream",
|
1280 |
+
"text": [
|
1281 |
+
"[nltk_data] Downloading package stopwords to\n",
|
1282 |
+
"[nltk_data] C:\\Users\\andre\\AppData\\Roaming\\nltk_data...\n",
|
1283 |
+
"[nltk_data] Package stopwords is already up-to-date!\n"
|
1284 |
+
]
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"data": {
|
1288 |
+
"text/html": [
|
1289 |
+
"<div>\n",
|
1290 |
+
"<style scoped>\n",
|
1291 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1292 |
+
" vertical-align: middle;\n",
|
1293 |
+
" }\n",
|
1294 |
+
"\n",
|
1295 |
+
" .dataframe tbody tr th {\n",
|
1296 |
+
" vertical-align: top;\n",
|
1297 |
+
" }\n",
|
1298 |
+
"\n",
|
1299 |
+
" .dataframe thead th {\n",
|
1300 |
+
" text-align: right;\n",
|
1301 |
+
" }\n",
|
1302 |
+
"</style>\n",
|
1303 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1304 |
+
" <thead>\n",
|
1305 |
+
" <tr style=\"text-align: right;\">\n",
|
1306 |
+
" <th></th>\n",
|
1307 |
+
" <th>review</th>\n",
|
1308 |
+
" <th>sentiment</th>\n",
|
1309 |
+
" <th>bert_results</th>\n",
|
1310 |
+
" </tr>\n",
|
1311 |
+
" </thead>\n",
|
1312 |
+
" <tbody>\n",
|
1313 |
+
" <tr>\n",
|
1314 |
+
" <th>651</th>\n",
|
1315 |
+
" <td>The film is content as it is to run clever one...</td>\n",
|
1316 |
+
" <td>negative</td>\n",
|
1317 |
+
" <td>Positive</td>\n",
|
1318 |
+
" </tr>\n",
|
1319 |
+
" <tr>\n",
|
1320 |
+
" <th>2205</th>\n",
|
1321 |
+
" <td>&#91;Has&#93; a surprising and somewhat disapp...</td>\n",
|
1322 |
+
" <td>negative</td>\n",
|
1323 |
+
" <td>Positive</td>\n",
|
1324 |
+
" </tr>\n",
|
1325 |
+
" <tr>\n",
|
1326 |
+
" <th>362</th>\n",
|
1327 |
+
" <td>Absurdly over-rated...</td>\n",
|
1328 |
+
" <td>negative</td>\n",
|
1329 |
+
" <td>Negative</td>\n",
|
1330 |
+
" </tr>\n",
|
1331 |
+
" <tr>\n",
|
1332 |
+
" <th>2784</th>\n",
|
1333 |
+
" <td>A rare bird, not because of what it's like but...</td>\n",
|
1334 |
+
" <td>negative</td>\n",
|
1335 |
+
" <td>Positive</td>\n",
|
1336 |
+
" </tr>\n",
|
1337 |
+
" <tr>\n",
|
1338 |
+
" <th>1914</th>\n",
|
1339 |
+
" <td>Lord of Illusions is also quite repulsive, as ...</td>\n",
|
1340 |
+
" <td>negative</td>\n",
|
1341 |
+
" <td>Positive</td>\n",
|
1342 |
+
" </tr>\n",
|
1343 |
+
" <tr>\n",
|
1344 |
+
" <th>...</th>\n",
|
1345 |
+
" <td>...</td>\n",
|
1346 |
+
" <td>...</td>\n",
|
1347 |
+
" <td>...</td>\n",
|
1348 |
+
" </tr>\n",
|
1349 |
+
" <tr>\n",
|
1350 |
+
" <th>2230</th>\n",
|
1351 |
+
" <td>The movie is completely innocuous, passably en...</td>\n",
|
1352 |
+
" <td>negative</td>\n",
|
1353 |
+
" <td>Positive</td>\n",
|
1354 |
+
" </tr>\n",
|
1355 |
+
" <tr>\n",
|
1356 |
+
" <th>2354</th>\n",
|
1357 |
+
" <td>A mud-simple horror trudge set in a swamp colo...</td>\n",
|
1358 |
+
" <td>negative</td>\n",
|
1359 |
+
" <td>Negative</td>\n",
|
1360 |
+
" </tr>\n",
|
1361 |
+
" <tr>\n",
|
1362 |
+
" <th>2404</th>\n",
|
1363 |
+
" <td>Just plain generic.</td>\n",
|
1364 |
+
" <td>negative</td>\n",
|
1365 |
+
" <td>Negative</td>\n",
|
1366 |
+
" </tr>\n",
|
1367 |
+
" <tr>\n",
|
1368 |
+
" <th>720</th>\n",
|
1369 |
+
" <td>Ulmer brings an enormous amount of impressioni...</td>\n",
|
1370 |
+
" <td>positive</td>\n",
|
1371 |
+
" <td>Negative</td>\n",
|
1372 |
+
" </tr>\n",
|
1373 |
+
" <tr>\n",
|
1374 |
+
" <th>527</th>\n",
|
1375 |
+
" <td>In their directorial debut, Britt Poulton and ...</td>\n",
|
1376 |
+
" <td>negative</td>\n",
|
1377 |
+
" <td>Negative</td>\n",
|
1378 |
+
" </tr>\n",
|
1379 |
+
" </tbody>\n",
|
1380 |
+
"</table>\n",
|
1381 |
+
"<p>3000 rows × 3 columns</p>\n",
|
1382 |
+
"</div>"
|
1383 |
+
],
|
1384 |
+
"text/plain": [
|
1385 |
+
" review sentiment bert_results\n",
|
1386 |
+
"651 The film is content as it is to run clever one... negative Positive\n",
|
1387 |
+
"2205 [Has] a surprising and somewhat disapp... negative Positive\n",
|
1388 |
+
"362 Absurdly over-rated... negative Negative\n",
|
1389 |
+
"2784 A rare bird, not because of what it's like but... negative Positive\n",
|
1390 |
+
"1914 Lord of Illusions is also quite repulsive, as ... negative Positive\n",
|
1391 |
+
"... ... ... ...\n",
|
1392 |
+
"2230 The movie is completely innocuous, passably en... negative Positive\n",
|
1393 |
+
"2354 A mud-simple horror trudge set in a swamp colo... negative Negative\n",
|
1394 |
+
"2404 Just plain generic. negative Negative\n",
|
1395 |
+
"720 Ulmer brings an enormous amount of impressioni... positive Negative\n",
|
1396 |
+
"527 In their directorial debut, Britt Poulton and ... negative Negative\n",
|
1397 |
+
"\n",
|
1398 |
+
"[3000 rows x 3 columns]"
|
1399 |
+
]
|
1400 |
+
},
|
1401 |
+
"execution_count": 30,
|
1402 |
+
"metadata": {},
|
1403 |
+
"output_type": "execute_result"
|
1404 |
+
}
|
1405 |
+
],
|
1406 |
+
"source": [
|
1407 |
+
"import pandas as pd\n",
|
1408 |
+
"from preprocess_data import preprocess_text,get_stopwords\n",
|
1409 |
+
"from transformers import AutoTokenizer, pipeline\n",
|
1410 |
+
"\n",
|
1411 |
+
"df = pd.read_csv('../data/rotten_tomatos.csv')\n",
|
1412 |
+
"\n",
|
1413 |
+
"MODEL_PATH = 'danielcd99/BERT_imdb'\n",
|
1414 |
+
"\n",
|
1415 |
+
"def load_pipeline():\n",
|
1416 |
+
" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)\n",
|
1417 |
+
" tokenizer.model_max_length = 200\n",
|
1418 |
+
"\n",
|
1419 |
+
" pipe=pipeline(\n",
|
1420 |
+
" \"text-classification\",\n",
|
1421 |
+
" model=MODEL_PATH\n",
|
1422 |
+
" )\n",
|
1423 |
+
" return pipe\n",
|
1424 |
+
"\n",
|
1425 |
+
"pipe = load_pipeline()\n",
|
1426 |
+
"get_stopwords()\n",
|
1427 |
+
"df['preprocessed_review'] = df['review'].copy()\n",
|
1428 |
+
"df['preprocessed_review'] = df['preprocessed_review'].apply(preprocess_text)\n",
|
1429 |
+
" \n",
|
1430 |
+
"predictions = []\n",
|
1431 |
+
"for review in df['preprocessed_review']:\n",
|
1432 |
+
" try:\n",
|
1433 |
+
" label = pipe(review)[0]['label']\n",
|
1434 |
+
" except:\n",
|
1435 |
+
" print(\"Ocorreu um erro de carregamento, tente novamente!\")\n",
|
1436 |
+
" \n",
|
1437 |
+
" if label == 'LABEL_0':\n",
|
1438 |
+
" predictions.append(0)\n",
|
1439 |
+
" else:\n",
|
1440 |
+
" predictions.append(1)\n",
|
1441 |
+
"\n",
|
1442 |
+
"df['bert_results'] = predictions\n",
|
1443 |
+
"\n",
|
1444 |
+
"cols = ['review','sentiment', 'bert_results']\n",
|
1445 |
+
"df = df[cols]\n",
|
1446 |
+
"df"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
{
|
1450 |
+
"cell_type": "code",
|
1451 |
+
"execution_count": 31,
|
1452 |
+
"metadata": {},
|
1453 |
+
"outputs": [
|
1454 |
+
{
|
1455 |
+
"name": "stdout",
|
1456 |
+
"output_type": "stream",
|
1457 |
+
"text": [
|
1458 |
+
"Precision: 0.8066\n",
|
1459 |
+
"Recall: 0.8449\n",
|
1460 |
+
"F1 Score: 0.8253\n"
|
1461 |
+
]
|
1462 |
+
},
|
1463 |
+
{
|
1464 |
+
"data": {
|
1465 |
+
"image/png": "",
|
1466 |
+
"text/plain": [
|
1467 |
+
"<Figure size 800x600 with 2 Axes>"
|
1468 |
+
]
|
1469 |
+
},
|
1470 |
+
"metadata": {},
|
1471 |
+
"output_type": "display_data"
|
1472 |
+
}
|
1473 |
+
],
|
1474 |
+
"source": [
|
1475 |
+
"from sklearn.metrics import confusion_matrix, precision_recall_fscore_support\n",
|
1476 |
+
"import matplotlib.pyplot as plt\n",
|
1477 |
+
"import seaborn as sns\n",
|
1478 |
+
"\n",
|
1479 |
+
"# Mapear 'Positive' para 1 e 'Negative' para 0 em 'sentiment'\n",
|
1480 |
+
"df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})\n",
|
1481 |
+
"df['bert_results'] = df['bert_results'].map({'Positive': 1, 'Negative': 0})\n",
|
1482 |
+
"\n",
|
1483 |
+
"# Calcular métricas de avaliação: precision, recall, f1-score\n",
|
1484 |
+
"precision, recall, f1_score, _ = precision_recall_fscore_support(df['sentiment'], df['bert_results'], average='binary')\n",
|
1485 |
+
"\n",
|
1486 |
+
"print(f\"Precision: {precision:.4f}\")\n",
|
1487 |
+
"print(f\"Recall: {recall:.4f}\")\n",
|
1488 |
+
"print(f\"F1 Score: {f1_score:.4f}\")\n",
|
1489 |
+
"\n",
|
1490 |
+
"# Calcular e plotar a matriz de confusão\n",
|
1491 |
+
"cm = confusion_matrix(df['sentiment'], df['bert_results'])\n",
|
1492 |
+
"plt.figure(figsize=(8, 6))\n",
|
1493 |
+
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])\n",
|
1494 |
+
"plt.xlabel('Predicted')\n",
|
1495 |
+
"plt.ylabel('True')\n",
|
1496 |
+
"plt.title('Confusion Matrix')\n",
|
1497 |
+
"plt.show()"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
{
|
1501 |
+
"cell_type": "markdown",
|
1502 |
+
"metadata": {},
|
1503 |
+
"source": [
|
1504 |
+
"Após ajustar o modelo BERT utilizando a base de dados do IMDb, avaliada com referência aos dados do Rotten Tomatoes, obtivemos as seguintes métricas de desempenho:\n",
|
1505 |
+
"\n",
|
1506 |
+
"Precision: 0.8562 --- Recall: 0.8654 --- F1 Score: 0.8608\n",
|
1507 |
+
"\n",
|
1508 |
+
"Essas métricas indicam que o modelo ajustado conseguiu classificar de forma bastante precisa os sentimentos dos textos da base de dados IMDb, utilizando o BERT finetunado com dados do Rotten Tomatoes como referência."
|
1509 |
+
]
|
1510 |
}
|
1511 |
],
|
1512 |
"metadata": {
|
|
|
1529 |
"name": "python",
|
1530 |
"nbconvert_exporter": "python",
|
1531 |
"pygments_lexer": "ipython3",
|
1532 |
+
"version": "3.11.7"
|
1533 |
}
|
1534 |
},
|
1535 |
"nbformat": 4,
|
notebooks_explicativos/Simbolico.ipynb
CHANGED
@@ -465,7 +465,7 @@
|
|
465 |
}
|
466 |
],
|
467 |
"source": [
|
468 |
-
"df = pd.read_csv('imdb.csv')\n",
|
469 |
"df.head(5)"
|
470 |
]
|
471 |
},
|
|
|
465 |
}
|
466 |
],
|
467 |
"source": [
|
468 |
+
"df = pd.read_csv('../data/imdb.csv')\n",
|
469 |
"df.head(5)"
|
470 |
]
|
471 |
},
|