romanbredehoft-zama commited on
Commit
4d9e29f
1 Parent(s): c85d79c

Update download comments

Browse files
Files changed (2) hide show
  1. SentimentClassification.ipynb +12 -0
  2. download_data.sh +4 -2
SentimentClassification.ipynb CHANGED
@@ -55,9 +55,12 @@
55
  ],
56
  "source": [
57
  "# Download the datasets\n",
 
 
58
  "if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
59
  " raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
60
  "\n",
 
61
  "train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
62
  "text_X = train[\"text\"]\n",
63
  "y = train[\"airline_sentiment\"]\n",
@@ -981,6 +984,15 @@
981
  "metadata": {
982
  "execution": {
983
  "timeout": 10800
 
 
 
 
 
 
 
 
 
984
  }
985
  },
986
  "nbformat": 4,
 
55
  ],
56
  "source": [
57
  "# Download the datasets\n",
58
+ "# The dataset can be downloaded through the `download_data.sh` script, which requires to set up\n",
59
+ "# Kaggle's CLI, or manually at https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment\n",
60
  "if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
61
  " raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
62
  "\n",
63
+ "\n",
64
  "train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
65
  "text_X = train[\"text\"]\n",
66
  "y = train[\"airline_sentiment\"]\n",
 
984
  "metadata": {
985
  "execution": {
986
  "timeout": 10800
987
+ },
988
+ "kernelspec": {
989
+ "display_name": ".venv",
990
+ "language": "python",
991
+ "name": "python3"
992
+ },
993
+ "language_info": {
994
+ "name": "python",
995
+ "version": "3.10.11"
996
  }
997
  },
998
  "nbformat": 4,
download_data.sh CHANGED
@@ -2,8 +2,10 @@
2
 
3
  set -e
4
 
5
- # You need to have a valid ~/.kaggle/kaggle.json, that you can generate from "Create new API token"
6
- # on your account page in kaggle.com
 
 
7
  rm -rf local_datasets
8
  mkdir local_datasets
9
  cd local_datasets
 
2
 
3
  set -e
4
 
5
+ # You need to install kaggle using pip and then have a valid ~/.kaggle/kaggle.json, that you can
6
+ # generate from "Create new API token" on your account page in kaggle.com
7
+ # Alternatively, the dataset can be downloaded manually at
8
+ # https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment
9
  rm -rf local_datasets
10
  mkdir local_datasets
11
  cd local_datasets