romanbredehoft-zama
commited on
Commit
•
4d9e29f
1
Parent(s):
c85d79c
Update download comments
Browse files- SentimentClassification.ipynb +12 -0
- download_data.sh +4 -2
SentimentClassification.ipynb
CHANGED
@@ -55,9 +55,12 @@
|
|
55 |
],
|
56 |
"source": [
|
57 |
"# Download the datasets\n",
|
|
|
|
|
58 |
"if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
|
59 |
" raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
|
60 |
"\n",
|
|
|
61 |
"train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
|
62 |
"text_X = train[\"text\"]\n",
|
63 |
"y = train[\"airline_sentiment\"]\n",
|
@@ -981,6 +984,15 @@
|
|
981 |
"metadata": {
|
982 |
"execution": {
|
983 |
"timeout": 10800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
984 |
}
|
985 |
},
|
986 |
"nbformat": 4,
|
|
|
55 |
],
|
56 |
"source": [
|
57 |
"# Download the datasets\n",
|
58 |
+
"# The dataset can be downloaded through the `download_data.sh` script, which requires to set up\n",
|
59 |
+
"# Kaggle's CLI, or manually at https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment\n",
|
60 |
"if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
|
61 |
" raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
|
62 |
"\n",
|
63 |
+
"\n",
|
64 |
"train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
|
65 |
"text_X = train[\"text\"]\n",
|
66 |
"y = train[\"airline_sentiment\"]\n",
|
|
|
984 |
"metadata": {
|
985 |
"execution": {
|
986 |
"timeout": 10800
|
987 |
+
},
|
988 |
+
"kernelspec": {
|
989 |
+
"display_name": ".venv",
|
990 |
+
"language": "python",
|
991 |
+
"name": "python3"
|
992 |
+
},
|
993 |
+
"language_info": {
|
994 |
+
"name": "python",
|
995 |
+
"version": "3.10.11"
|
996 |
}
|
997 |
},
|
998 |
"nbformat": 4,
|
download_data.sh
CHANGED
@@ -2,8 +2,10 @@
|
|
2 |
|
3 |
set -e
|
4 |
|
5 |
-
# You need to have a valid ~/.kaggle/kaggle.json, that you can
|
6 |
-
# on your account page in kaggle.com
|
|
|
|
|
7 |
rm -rf local_datasets
|
8 |
mkdir local_datasets
|
9 |
cd local_datasets
|
|
|
2 |
|
3 |
set -e
|
4 |
|
5 |
+
# You need to install kaggle using pip and then have a valid ~/.kaggle/kaggle.json, that you can
|
6 |
+
# generate from "Create new API token" on your account page in kaggle.com
|
7 |
+
# Alternatively, the dataset can be downloaded manually at
|
8 |
+
# https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment
|
9 |
rm -rf local_datasets
|
10 |
mkdir local_datasets
|
11 |
cd local_datasets
|