{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"id": "BFAiaUj2ijaZ"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"source": [
"url = 'https://raw.githubusercontent.com/caroline710/big-data-final/main/data/distilbert-usa.csv?token=GHSAT0AAAAAACKS7T4CPWHARX2P2DK77XL2ZLSIMEA'\n",
"\n",
"data = pd.read_csv(url)\n",
"data.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 365
},
"id": "pZyZce4kovTb",
"outputId": "9eead540-55af-4d4a-fb8f-ceb69b0209c8"
},
"execution_count": 110,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Tweet ID Username \\\n",
"0 954865820156547072 JackPolakoff \n",
"1 954865477389570049 JackPolakoff \n",
"2 954865148212207617 sophixklein \n",
"3 954865125315481600 environmentalhe \n",
"4 954096307341115392 UN \n",
"\n",
" Content \\\n",
"0 DANGER!\\n#StableGenius at work\\n #ImpeachTrum... \n",
"1 What could possibly go wrong giving one party ... \n",
"2 @sameverlark_ @ToddDracula probs right; honest... \n",
"3 This, 100%. Prevention is the best protection ... \n",
"4 Disasters push 24M people into poverty every y... \n",
"\n",
" Created At User Location Filtered Location Label \\\n",
"0 2018-01-20 23:58:46+00:00 New York City New York LABEL_1 \n",
"1 2018-01-20 23:57:25+00:00 New York City New York LABEL_1 \n",
"2 2018-01-20 23:56:06+00:00 West Virginia, USA Virginia LABEL_1 \n",
"3 2018-01-20 23:56:01+00:00 Thousand Oaks, CA California LABEL_1 \n",
"4 2018-01-18 21:01:00+00:00 New York, NY New York LABEL_1 \n",
"\n",
" Confidence SentimentScore Logits \\\n",
"0 0.770995 1.213938 [-0.42340710759162903, 0.7905306816101074] \n",
"1 0.882707 2.018322 [-0.8597121238708496, 1.1586099863052368] \n",
"2 0.970610 3.497270 [-1.611891508102417, 1.885378122329712] \n",
"3 0.994816 5.257029 [-2.465799570083618, 2.791228771209717] \n",
"4 0.991497 4.758805 [-2.2687225341796875, 2.4900829792022705] \n",
"\n",
" Month_Year \n",
"0 01/2018 \n",
"1 01/2018 \n",
"2 01/2018 \n",
"3 01/2018 \n",
"4 01/2018 "
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Tweet ID | \n",
" Username | \n",
" Content | \n",
" Created At | \n",
" User Location | \n",
" Filtered Location | \n",
" Label | \n",
" Confidence | \n",
" SentimentScore | \n",
" Logits | \n",
" Month_Year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 954865820156547072 | \n",
" JackPolakoff | \n",
" DANGER!\\n#StableGenius at work\\n #ImpeachTrum... | \n",
" 2018-01-20 23:58:46+00:00 | \n",
" New York City | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.770995 | \n",
" 1.213938 | \n",
" [-0.42340710759162903, 0.7905306816101074] | \n",
" 01/2018 | \n",
"
\n",
" \n",
" 1 | \n",
" 954865477389570049 | \n",
" JackPolakoff | \n",
" What could possibly go wrong giving one party ... | \n",
" 2018-01-20 23:57:25+00:00 | \n",
" New York City | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.882707 | \n",
" 2.018322 | \n",
" [-0.8597121238708496, 1.1586099863052368] | \n",
" 01/2018 | \n",
"
\n",
" \n",
" 2 | \n",
" 954865148212207617 | \n",
" sophixklein | \n",
" @sameverlark_ @ToddDracula probs right; honest... | \n",
" 2018-01-20 23:56:06+00:00 | \n",
" West Virginia, USA | \n",
" Virginia | \n",
" LABEL_1 | \n",
" 0.970610 | \n",
" 3.497270 | \n",
" [-1.611891508102417, 1.885378122329712] | \n",
" 01/2018 | \n",
"
\n",
" \n",
" 3 | \n",
" 954865125315481600 | \n",
" environmentalhe | \n",
" This, 100%. Prevention is the best protection ... | \n",
" 2018-01-20 23:56:01+00:00 | \n",
" Thousand Oaks, CA | \n",
" California | \n",
" LABEL_1 | \n",
" 0.994816 | \n",
" 5.257029 | \n",
" [-2.465799570083618, 2.791228771209717] | \n",
" 01/2018 | \n",
"
\n",
" \n",
" 4 | \n",
" 954096307341115392 | \n",
" UN | \n",
" Disasters push 24M people into poverty every y... | \n",
" 2018-01-18 21:01:00+00:00 | \n",
" New York, NY | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.991497 | \n",
" 4.758805 | \n",
" [-2.2687225341796875, 2.4900829792022705] | \n",
" 01/2018 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 110
}
]
},
{
"cell_type": "code",
"source": [
"data['Year'] = pd.DatetimeIndex(data['Month_Year']).year\n",
"data['Month'] = pd.DatetimeIndex(data['Month_Year']).month_name()\n",
"data.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 365
},
"id": "ixV2yf89Clpj",
"outputId": "9264678a-9d88-42c9-decb-4fd3b49079a0"
},
"execution_count": 111,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Tweet ID Username \\\n",
"0 954865820156547072 JackPolakoff \n",
"1 954865477389570049 JackPolakoff \n",
"2 954865148212207617 sophixklein \n",
"3 954865125315481600 environmentalhe \n",
"4 954096307341115392 UN \n",
"\n",
" Content \\\n",
"0 DANGER!\\n#StableGenius at work\\n #ImpeachTrum... \n",
"1 What could possibly go wrong giving one party ... \n",
"2 @sameverlark_ @ToddDracula probs right; honest... \n",
"3 This, 100%. Prevention is the best protection ... \n",
"4 Disasters push 24M people into poverty every y... \n",
"\n",
" Created At User Location Filtered Location Label \\\n",
"0 2018-01-20 23:58:46+00:00 New York City New York LABEL_1 \n",
"1 2018-01-20 23:57:25+00:00 New York City New York LABEL_1 \n",
"2 2018-01-20 23:56:06+00:00 West Virginia, USA Virginia LABEL_1 \n",
"3 2018-01-20 23:56:01+00:00 Thousand Oaks, CA California LABEL_1 \n",
"4 2018-01-18 21:01:00+00:00 New York, NY New York LABEL_1 \n",
"\n",
" Confidence SentimentScore Logits \\\n",
"0 0.770995 1.213938 [-0.42340710759162903, 0.7905306816101074] \n",
"1 0.882707 2.018322 [-0.8597121238708496, 1.1586099863052368] \n",
"2 0.970610 3.497270 [-1.611891508102417, 1.885378122329712] \n",
"3 0.994816 5.257029 [-2.465799570083618, 2.791228771209717] \n",
"4 0.991497 4.758805 [-2.2687225341796875, 2.4900829792022705] \n",
"\n",
" Month_Year Year Month \n",
"0 01/2018 2018 January \n",
"1 01/2018 2018 January \n",
"2 01/2018 2018 January \n",
"3 01/2018 2018 January \n",
"4 01/2018 2018 January "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Tweet ID | \n",
" Username | \n",
" Content | \n",
" Created At | \n",
" User Location | \n",
" Filtered Location | \n",
" Label | \n",
" Confidence | \n",
" SentimentScore | \n",
" Logits | \n",
" Month_Year | \n",
" Year | \n",
" Month | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 954865820156547072 | \n",
" JackPolakoff | \n",
" DANGER!\\n#StableGenius at work\\n #ImpeachTrum... | \n",
" 2018-01-20 23:58:46+00:00 | \n",
" New York City | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.770995 | \n",
" 1.213938 | \n",
" [-0.42340710759162903, 0.7905306816101074] | \n",
" 01/2018 | \n",
" 2018 | \n",
" January | \n",
"
\n",
" \n",
" 1 | \n",
" 954865477389570049 | \n",
" JackPolakoff | \n",
" What could possibly go wrong giving one party ... | \n",
" 2018-01-20 23:57:25+00:00 | \n",
" New York City | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.882707 | \n",
" 2.018322 | \n",
" [-0.8597121238708496, 1.1586099863052368] | \n",
" 01/2018 | \n",
" 2018 | \n",
" January | \n",
"
\n",
" \n",
" 2 | \n",
" 954865148212207617 | \n",
" sophixklein | \n",
" @sameverlark_ @ToddDracula probs right; honest... | \n",
" 2018-01-20 23:56:06+00:00 | \n",
" West Virginia, USA | \n",
" Virginia | \n",
" LABEL_1 | \n",
" 0.970610 | \n",
" 3.497270 | \n",
" [-1.611891508102417, 1.885378122329712] | \n",
" 01/2018 | \n",
" 2018 | \n",
" January | \n",
"
\n",
" \n",
" 3 | \n",
" 954865125315481600 | \n",
" environmentalhe | \n",
" This, 100%. Prevention is the best protection ... | \n",
" 2018-01-20 23:56:01+00:00 | \n",
" Thousand Oaks, CA | \n",
" California | \n",
" LABEL_1 | \n",
" 0.994816 | \n",
" 5.257029 | \n",
" [-2.465799570083618, 2.791228771209717] | \n",
" 01/2018 | \n",
" 2018 | \n",
" January | \n",
"
\n",
" \n",
" 4 | \n",
" 954096307341115392 | \n",
" UN | \n",
" Disasters push 24M people into poverty every y... | \n",
" 2018-01-18 21:01:00+00:00 | \n",
" New York, NY | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.991497 | \n",
" 4.758805 | \n",
" [-2.2687225341796875, 2.4900829792022705] | \n",
" 01/2018 | \n",
" 2018 | \n",
" January | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 111
}
]
},
{
"cell_type": "code",
"source": [
"year_groups = data.groupby('Year')\n",
"data_2018 = year_groups.get_group(2018)\n",
"data_2019 = year_groups.get_group(2019)\n",
"data_2020 = year_groups.get_group(2020)\n",
"data_2020.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 452
},
"id": "jXDTpPqkKeNq",
"outputId": "28cf4541-bf9e-4a2d-b62f-3b9fd2f5e034"
},
"execution_count": 112,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Tweet ID Username \\\n",
"59475 1217959618174967815 dauriachrista \n",
"59476 1217959534947377158 xokaylaaa \n",
"59477 1217959496833695744 Micwritermcleod \n",
"59478 1217959392273993728 RoscoeMcGillic1 \n",
"59479 1217959316948316160 UtahCleanEnergy \n",
"\n",
" Content \\\n",
"59475 @AnnaPerssonDR Climate Change is very, serious... \n",
"59476 Tom Steyer seems like a great guy, but his top... \n",
"59477 @SkyNews Climate change is normal. The hysteri... \n",
"59478 @CharlieDaniels Your neighbor, Al Gore, was an... \n",
"59479 We encourage read Rep. @StephenHandyUT in the ... \n",
"\n",
" Created At User Location \\\n",
"59475 2020-01-16 23:59:36+00:00 Henrietta, NY \n",
"59476 2020-01-16 23:59:16+00:00 Hilton Head Island, SC, USA \n",
"59477 2020-01-16 23:59:07+00:00 Maryland, USA \n",
"59478 2020-01-16 23:58:42+00:00 Nashville, TN \n",
"59479 2020-01-16 23:58:24+00:00 Utah \n",
"\n",
" Filtered Location Label Confidence SentimentScore \\\n",
"59475 New York LABEL_1 0.852147 1.751541 \n",
"59476 South Carolina LABEL_1 0.900721 2.205264 \n",
"59477 Maryland LABEL_0 0.892076 -2.112127 \n",
"59478 Tennessee LABEL_1 0.868285 1.885878 \n",
"59479 Utah LABEL_1 0.995574 5.415805 \n",
"\n",
" Logits Month_Year Year Month \n",
"59475 [-0.765012264251709, 0.9865289330482483] 01/2020 2020 January \n",
"59476 [-0.9609189629554749, 1.2443453073501587] 01/2020 2020 January \n",
"59477 [1.184826135635376, -0.927301287651062] 01/2020 2020 January \n",
"59478 [-0.8685668110847473, 1.0173112154006958] 01/2020 2020 January \n",
"59479 [-2.542006492614746, 2.873798131942749] 01/2020 2020 January "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Tweet ID | \n",
" Username | \n",
" Content | \n",
" Created At | \n",
" User Location | \n",
" Filtered Location | \n",
" Label | \n",
" Confidence | \n",
" SentimentScore | \n",
" Logits | \n",
" Month_Year | \n",
" Year | \n",
" Month | \n",
"
\n",
" \n",
" \n",
" \n",
" 59475 | \n",
" 1217959618174967815 | \n",
" dauriachrista | \n",
" @AnnaPerssonDR Climate Change is very, serious... | \n",
" 2020-01-16 23:59:36+00:00 | \n",
" Henrietta, NY | \n",
" New York | \n",
" LABEL_1 | \n",
" 0.852147 | \n",
" 1.751541 | \n",
" [-0.765012264251709, 0.9865289330482483] | \n",
" 01/2020 | \n",
" 2020 | \n",
" January | \n",
"
\n",
" \n",
" 59476 | \n",
" 1217959534947377158 | \n",
" xokaylaaa | \n",
" Tom Steyer seems like a great guy, but his top... | \n",
" 2020-01-16 23:59:16+00:00 | \n",
" Hilton Head Island, SC, USA | \n",
" South Carolina | \n",
" LABEL_1 | \n",
" 0.900721 | \n",
" 2.205264 | \n",
" [-0.9609189629554749, 1.2443453073501587] | \n",
" 01/2020 | \n",
" 2020 | \n",
" January | \n",
"
\n",
" \n",
" 59477 | \n",
" 1217959496833695744 | \n",
" Micwritermcleod | \n",
" @SkyNews Climate change is normal. The hysteri... | \n",
" 2020-01-16 23:59:07+00:00 | \n",
" Maryland, USA | \n",
" Maryland | \n",
" LABEL_0 | \n",
" 0.892076 | \n",
" -2.112127 | \n",
" [1.184826135635376, -0.927301287651062] | \n",
" 01/2020 | \n",
" 2020 | \n",
" January | \n",
"
\n",
" \n",
" 59478 | \n",
" 1217959392273993728 | \n",
" RoscoeMcGillic1 | \n",
" @CharlieDaniels Your neighbor, Al Gore, was an... | \n",
" 2020-01-16 23:58:42+00:00 | \n",
" Nashville, TN | \n",
" Tennessee | \n",
" LABEL_1 | \n",
" 0.868285 | \n",
" 1.885878 | \n",
" [-0.8685668110847473, 1.0173112154006958] | \n",
" 01/2020 | \n",
" 2020 | \n",
" January | \n",
"
\n",
" \n",
" 59479 | \n",
" 1217959316948316160 | \n",
" UtahCleanEnergy | \n",
" We encourage read Rep. @StephenHandyUT in the ... | \n",
" 2020-01-16 23:58:24+00:00 | \n",
" Utah | \n",
" Utah | \n",
" LABEL_1 | \n",
" 0.995574 | \n",
" 5.415805 | \n",
" [-2.542006492614746, 2.873798131942749] | \n",
" 01/2020 | \n",
" 2020 | \n",
" January | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 112
}
]
},
{
"cell_type": "code",
"source": [
"sns.set_theme(style='whitegrid')\n",
"# 2018 data\n",
"plt.figure(figsize=(12.5,6))\n",
"sns.lineplot(data=data_2018, x='Month', y='SentimentScore', color='skyblue', marker='o')\n",
"plt.title('2018-usa-data', size=20)\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 581
},
"id": "mSsftKOFpZ2V",
"outputId": "7c073f97-28f5-445a-e6d5-9daf0a39b8eb"
},
"execution_count": 114,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"