Spaces:
Running
Running
Upload datacreation.ipynb
Browse files- datacreation.ipynb +76 -254
datacreation.ipynb
CHANGED
|
@@ -20,13 +20,13 @@
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
-
"execution_count":
|
| 24 |
"metadata": {
|
| 25 |
"colab": {
|
| 26 |
"base_uri": "https://localhost:8080/"
|
| 27 |
},
|
| 28 |
"id": "f48c8f8c",
|
| 29 |
-
"outputId": "
|
| 30 |
},
|
| 31 |
"outputs": [
|
| 32 |
{
|
|
@@ -46,8 +46,8 @@
|
|
| 46 |
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.3)\n",
|
| 47 |
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.3.3)\n",
|
| 48 |
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (0.12.1)\n",
|
| 49 |
-
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (4.
|
| 50 |
-
"Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.
|
| 51 |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (26.0)\n",
|
| 52 |
"Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (11.3.0)\n",
|
| 53 |
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (3.3.2)\n",
|
|
@@ -85,7 +85,7 @@
|
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"cell_type": "code",
|
| 88 |
-
"execution_count":
|
| 89 |
"metadata": {
|
| 90 |
"id": "91d52125"
|
| 91 |
},
|
|
@@ -113,7 +113,7 @@
|
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"cell_type": "code",
|
| 116 |
-
"execution_count":
|
| 117 |
"metadata": {
|
| 118 |
"id": "xqO5Y3dnYhxt"
|
| 119 |
},
|
|
@@ -145,7 +145,7 @@
|
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"cell_type": "code",
|
| 148 |
-
"execution_count":
|
| 149 |
"metadata": {
|
| 150 |
"id": "l5FkkNhUYTHh"
|
| 151 |
},
|
|
@@ -163,7 +163,52 @@
|
|
| 163 |
},
|
| 164 |
{
|
| 165 |
"cell_type": "code",
|
| 166 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
"metadata": {
|
| 168 |
"id": "lC1U_YHtZifh"
|
| 169 |
},
|
|
@@ -187,180 +232,11 @@
|
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"cell_type": "code",
|
| 190 |
-
"execution_count":
|
| 191 |
"metadata": {
|
| 192 |
-
"
|
| 193 |
-
"base_uri": "https://localhost:8080/",
|
| 194 |
-
"height": 206
|
| 195 |
-
},
|
| 196 |
-
"id": "O_wIvTxYZqCK",
|
| 197 |
-
"outputId": "349b36b0-c008-4fd5-d4a4-dba38ae18337"
|
| 198 |
},
|
| 199 |
-
"outputs": [
|
| 200 |
-
{
|
| 201 |
-
"output_type": "execute_result",
|
| 202 |
-
"data": {
|
| 203 |
-
"text/plain": [
|
| 204 |
-
" title price rating\n",
|
| 205 |
-
"0 A Light in the Attic 51.77 Three\n",
|
| 206 |
-
"1 Tipping the Velvet 53.74 One\n",
|
| 207 |
-
"2 Soumission 50.10 One\n",
|
| 208 |
-
"3 Sharp Objects 47.82 Four\n",
|
| 209 |
-
"4 Sapiens: A Brief History of Humankind 54.23 Five"
|
| 210 |
-
],
|
| 211 |
-
"text/html": [
|
| 212 |
-
"\n",
|
| 213 |
-
" <div id=\"df-04c87660-4415-45e9-ad3b-3fa19d9402c2\" class=\"colab-df-container\">\n",
|
| 214 |
-
" <div>\n",
|
| 215 |
-
"<style scoped>\n",
|
| 216 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
| 217 |
-
" vertical-align: middle;\n",
|
| 218 |
-
" }\n",
|
| 219 |
-
"\n",
|
| 220 |
-
" .dataframe tbody tr th {\n",
|
| 221 |
-
" vertical-align: top;\n",
|
| 222 |
-
" }\n",
|
| 223 |
-
"\n",
|
| 224 |
-
" .dataframe thead th {\n",
|
| 225 |
-
" text-align: right;\n",
|
| 226 |
-
" }\n",
|
| 227 |
-
"</style>\n",
|
| 228 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
| 229 |
-
" <thead>\n",
|
| 230 |
-
" <tr style=\"text-align: right;\">\n",
|
| 231 |
-
" <th></th>\n",
|
| 232 |
-
" <th>title</th>\n",
|
| 233 |
-
" <th>price</th>\n",
|
| 234 |
-
" <th>rating</th>\n",
|
| 235 |
-
" </tr>\n",
|
| 236 |
-
" </thead>\n",
|
| 237 |
-
" <tbody>\n",
|
| 238 |
-
" <tr>\n",
|
| 239 |
-
" <th>0</th>\n",
|
| 240 |
-
" <td>A Light in the Attic</td>\n",
|
| 241 |
-
" <td>51.77</td>\n",
|
| 242 |
-
" <td>Three</td>\n",
|
| 243 |
-
" </tr>\n",
|
| 244 |
-
" <tr>\n",
|
| 245 |
-
" <th>1</th>\n",
|
| 246 |
-
" <td>Tipping the Velvet</td>\n",
|
| 247 |
-
" <td>53.74</td>\n",
|
| 248 |
-
" <td>One</td>\n",
|
| 249 |
-
" </tr>\n",
|
| 250 |
-
" <tr>\n",
|
| 251 |
-
" <th>2</th>\n",
|
| 252 |
-
" <td>Soumission</td>\n",
|
| 253 |
-
" <td>50.10</td>\n",
|
| 254 |
-
" <td>One</td>\n",
|
| 255 |
-
" </tr>\n",
|
| 256 |
-
" <tr>\n",
|
| 257 |
-
" <th>3</th>\n",
|
| 258 |
-
" <td>Sharp Objects</td>\n",
|
| 259 |
-
" <td>47.82</td>\n",
|
| 260 |
-
" <td>Four</td>\n",
|
| 261 |
-
" </tr>\n",
|
| 262 |
-
" <tr>\n",
|
| 263 |
-
" <th>4</th>\n",
|
| 264 |
-
" <td>Sapiens: A Brief History of Humankind</td>\n",
|
| 265 |
-
" <td>54.23</td>\n",
|
| 266 |
-
" <td>Five</td>\n",
|
| 267 |
-
" </tr>\n",
|
| 268 |
-
" </tbody>\n",
|
| 269 |
-
"</table>\n",
|
| 270 |
-
"</div>\n",
|
| 271 |
-
" <div class=\"colab-df-buttons\">\n",
|
| 272 |
-
"\n",
|
| 273 |
-
" <div class=\"colab-df-container\">\n",
|
| 274 |
-
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-04c87660-4415-45e9-ad3b-3fa19d9402c2')\"\n",
|
| 275 |
-
" title=\"Convert this dataframe to an interactive table.\"\n",
|
| 276 |
-
" style=\"display:none;\">\n",
|
| 277 |
-
"\n",
|
| 278 |
-
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
| 279 |
-
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
| 280 |
-
" </svg>\n",
|
| 281 |
-
" </button>\n",
|
| 282 |
-
"\n",
|
| 283 |
-
" <style>\n",
|
| 284 |
-
" .colab-df-container {\n",
|
| 285 |
-
" display:flex;\n",
|
| 286 |
-
" gap: 12px;\n",
|
| 287 |
-
" }\n",
|
| 288 |
-
"\n",
|
| 289 |
-
" .colab-df-convert {\n",
|
| 290 |
-
" background-color: #E8F0FE;\n",
|
| 291 |
-
" border: none;\n",
|
| 292 |
-
" border-radius: 50%;\n",
|
| 293 |
-
" cursor: pointer;\n",
|
| 294 |
-
" display: none;\n",
|
| 295 |
-
" fill: #1967D2;\n",
|
| 296 |
-
" height: 32px;\n",
|
| 297 |
-
" padding: 0 0 0 0;\n",
|
| 298 |
-
" width: 32px;\n",
|
| 299 |
-
" }\n",
|
| 300 |
-
"\n",
|
| 301 |
-
" .colab-df-convert:hover {\n",
|
| 302 |
-
" background-color: #E2EBFA;\n",
|
| 303 |
-
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
| 304 |
-
" fill: #174EA6;\n",
|
| 305 |
-
" }\n",
|
| 306 |
-
"\n",
|
| 307 |
-
" .colab-df-buttons div {\n",
|
| 308 |
-
" margin-bottom: 4px;\n",
|
| 309 |
-
" }\n",
|
| 310 |
-
"\n",
|
| 311 |
-
" [theme=dark] .colab-df-convert {\n",
|
| 312 |
-
" background-color: #3B4455;\n",
|
| 313 |
-
" fill: #D2E3FC;\n",
|
| 314 |
-
" }\n",
|
| 315 |
-
"\n",
|
| 316 |
-
" [theme=dark] .colab-df-convert:hover {\n",
|
| 317 |
-
" background-color: #434B5C;\n",
|
| 318 |
-
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
| 319 |
-
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
| 320 |
-
" fill: #FFFFFF;\n",
|
| 321 |
-
" }\n",
|
| 322 |
-
" </style>\n",
|
| 323 |
-
"\n",
|
| 324 |
-
" <script>\n",
|
| 325 |
-
" const buttonEl =\n",
|
| 326 |
-
" document.querySelector('#df-04c87660-4415-45e9-ad3b-3fa19d9402c2 button.colab-df-convert');\n",
|
| 327 |
-
" buttonEl.style.display =\n",
|
| 328 |
-
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
| 329 |
-
"\n",
|
| 330 |
-
" async function convertToInteractive(key) {\n",
|
| 331 |
-
" const element = document.querySelector('#df-04c87660-4415-45e9-ad3b-3fa19d9402c2');\n",
|
| 332 |
-
" const dataTable =\n",
|
| 333 |
-
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
| 334 |
-
" [key], {});\n",
|
| 335 |
-
" if (!dataTable) return;\n",
|
| 336 |
-
"\n",
|
| 337 |
-
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
| 338 |
-
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
| 339 |
-
" + ' to learn more about interactive tables.';\n",
|
| 340 |
-
" element.innerHTML = '';\n",
|
| 341 |
-
" dataTable['output_type'] = 'display_data';\n",
|
| 342 |
-
" await google.colab.output.renderOutput(dataTable, element);\n",
|
| 343 |
-
" const docLink = document.createElement('div');\n",
|
| 344 |
-
" docLink.innerHTML = docLinkHtml;\n",
|
| 345 |
-
" element.appendChild(docLink);\n",
|
| 346 |
-
" }\n",
|
| 347 |
-
" </script>\n",
|
| 348 |
-
" </div>\n",
|
| 349 |
-
"\n",
|
| 350 |
-
"\n",
|
| 351 |
-
" </div>\n",
|
| 352 |
-
" </div>\n"
|
| 353 |
-
],
|
| 354 |
-
"application/vnd.google.colaboratory.intrinsic+json": {
|
| 355 |
-
"type": "dataframe",
|
| 356 |
-
"variable_name": "df_books",
|
| 357 |
-
"summary": "{\n \"name\": \"df_books\",\n \"rows\": 1000,\n \"fields\": [\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 999,\n \"samples\": [\n \"The Grownup\",\n \"Persepolis: The Story of a Childhood (Persepolis #1-2)\",\n \"Ayumi's Violin\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.446689669952772,\n \"min\": 10.0,\n \"max\": 59.99,\n \"num_unique_values\": 903,\n \"samples\": [\n 19.73,\n 55.65,\n 46.31\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"One\",\n \"Two\",\n \"Four\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
|
| 358 |
-
}
|
| 359 |
-
},
|
| 360 |
-
"metadata": {},
|
| 361 |
-
"execution_count": 6
|
| 362 |
-
}
|
| 363 |
-
],
|
| 364 |
"source": []
|
| 365 |
},
|
| 366 |
{
|
|
@@ -383,7 +259,7 @@
|
|
| 383 |
},
|
| 384 |
{
|
| 385 |
"cell_type": "code",
|
| 386 |
-
"execution_count":
|
| 387 |
"metadata": {
|
| 388 |
"id": "-gPXGcRPuV_9"
|
| 389 |
},
|
|
@@ -410,7 +286,7 @@
|
|
| 410 |
},
|
| 411 |
{
|
| 412 |
"cell_type": "code",
|
| 413 |
-
"execution_count":
|
| 414 |
"metadata": {
|
| 415 |
"id": "mnd5hdAbaNjz"
|
| 416 |
},
|
|
@@ -433,7 +309,7 @@
|
|
| 433 |
},
|
| 434 |
{
|
| 435 |
"cell_type": "code",
|
| 436 |
-
"execution_count":
|
| 437 |
"metadata": {
|
| 438 |
"id": "V-G3OCUCgR07"
|
| 439 |
},
|
|
@@ -451,7 +327,7 @@
|
|
| 451 |
},
|
| 452 |
{
|
| 453 |
"cell_type": "code",
|
| 454 |
-
"execution_count":
|
| 455 |
"metadata": {
|
| 456 |
"id": "kUtWmr8maZLZ"
|
| 457 |
},
|
|
@@ -477,7 +353,7 @@
|
|
| 477 |
},
|
| 478 |
{
|
| 479 |
"cell_type": "code",
|
| 480 |
-
"execution_count":
|
| 481 |
"metadata": {
|
| 482 |
"id": "tafQj8_7gYCG"
|
| 483 |
},
|
|
@@ -504,7 +380,7 @@
|
|
| 504 |
},
|
| 505 |
{
|
| 506 |
"cell_type": "code",
|
| 507 |
-
"execution_count":
|
| 508 |
"metadata": {
|
| 509 |
"id": "qkVhYPXGbgEn"
|
| 510 |
},
|
|
@@ -541,7 +417,7 @@
|
|
| 541 |
},
|
| 542 |
{
|
| 543 |
"cell_type": "code",
|
| 544 |
-
"execution_count":
|
| 545 |
"metadata": {
|
| 546 |
"id": "SlJ24AUafoDB"
|
| 547 |
},
|
|
@@ -570,7 +446,7 @@
|
|
| 570 |
},
|
| 571 |
{
|
| 572 |
"cell_type": "code",
|
| 573 |
-
"execution_count":
|
| 574 |
"metadata": {
|
| 575 |
"id": "wcN6gtiZg-ws"
|
| 576 |
},
|
|
@@ -588,28 +464,11 @@
|
|
| 588 |
},
|
| 589 |
{
|
| 590 |
"cell_type": "code",
|
| 591 |
-
"execution_count":
|
| 592 |
"metadata": {
|
| 593 |
-
"
|
| 594 |
-
"base_uri": "https://localhost:8080/"
|
| 595 |
-
},
|
| 596 |
-
"id": "MzbZvLcAhGaH",
|
| 597 |
-
"outputId": "c692bb04-7263-4115-a2ba-c72fe0180722"
|
| 598 |
},
|
| 599 |
-
"outputs": [
|
| 600 |
-
{
|
| 601 |
-
"output_type": "stream",
|
| 602 |
-
"name": "stdout",
|
| 603 |
-
"text": [
|
| 604 |
-
" title month units_sold sentiment_label\n",
|
| 605 |
-
"0 A Light in the Attic 2024-08 100 neutral\n",
|
| 606 |
-
"1 A Light in the Attic 2024-09 109 neutral\n",
|
| 607 |
-
"2 A Light in the Attic 2024-10 102 neutral\n",
|
| 608 |
-
"3 A Light in the Attic 2024-11 107 neutral\n",
|
| 609 |
-
"4 A Light in the Attic 2024-12 108 neutral\n"
|
| 610 |
-
]
|
| 611 |
-
}
|
| 612 |
-
],
|
| 613 |
"source": [
|
| 614 |
"df_sales.to_csv(\"synthetic_sales_data.csv\", index=False)\n",
|
| 615 |
"\n",
|
|
@@ -636,7 +495,7 @@
|
|
| 636 |
},
|
| 637 |
{
|
| 638 |
"cell_type": "code",
|
| 639 |
-
"execution_count":
|
| 640 |
"metadata": {
|
| 641 |
"id": "b3cd2a50"
|
| 642 |
},
|
|
@@ -672,7 +531,7 @@
|
|
| 672 |
},
|
| 673 |
{
|
| 674 |
"cell_type": "code",
|
| 675 |
-
"execution_count":
|
| 676 |
"metadata": {
|
| 677 |
"id": "l2SRc3PjuTGM"
|
| 678 |
},
|
|
@@ -705,7 +564,7 @@
|
|
| 705 |
},
|
| 706 |
{
|
| 707 |
"cell_type": "code",
|
| 708 |
-
"execution_count":
|
| 709 |
"metadata": {
|
| 710 |
"id": "ZUKUqZsuumsp"
|
| 711 |
},
|
|
@@ -726,24 +585,11 @@
|
|
| 726 |
},
|
| 727 |
{
|
| 728 |
"cell_type": "code",
|
| 729 |
-
"execution_count":
|
| 730 |
"metadata": {
|
| 731 |
-
"
|
| 732 |
-
"base_uri": "https://localhost:8080/"
|
| 733 |
-
},
|
| 734 |
-
"id": "3946e521",
|
| 735 |
-
"outputId": "514d7bef-0488-4933-b03c-953b9e8a7f66"
|
| 736 |
},
|
| 737 |
-
"outputs": [
|
| 738 |
-
{
|
| 739 |
-
"output_type": "stream",
|
| 740 |
-
"name": "stdout",
|
| 741 |
-
"text": [
|
| 742 |
-
"✅ Wrote synthetic_title_level_features.csv\n",
|
| 743 |
-
"✅ Wrote synthetic_monthly_revenue_series.csv\n"
|
| 744 |
-
]
|
| 745 |
-
}
|
| 746 |
-
],
|
| 747 |
"source": [
|
| 748 |
"import numpy as np\n",
|
| 749 |
"\n",
|
|
@@ -858,35 +704,11 @@
|
|
| 858 |
},
|
| 859 |
{
|
| 860 |
"cell_type": "code",
|
| 861 |
-
"execution_count":
|
| 862 |
"metadata": {
|
| 863 |
-
"
|
| 864 |
-
"base_uri": "https://localhost:8080/"
|
| 865 |
-
},
|
| 866 |
-
"id": "xfE8NMqOurKo",
|
| 867 |
-
"outputId": "191730ba-d5e2-4df7-97d2-99feb0b704af"
|
| 868 |
},
|
| 869 |
-
"outputs": [
|
| 870 |
-
{
|
| 871 |
-
"output_type": "stream",
|
| 872 |
-
"name": "stdout",
|
| 873 |
-
"text": [
|
| 874 |
-
" title sentiment_label \\\n",
|
| 875 |
-
"0 A Light in the Attic neutral \n",
|
| 876 |
-
"1 A Light in the Attic neutral \n",
|
| 877 |
-
"2 A Light in the Attic neutral \n",
|
| 878 |
-
"3 A Light in the Attic neutral \n",
|
| 879 |
-
"4 A Light in the Attic neutral \n",
|
| 880 |
-
"\n",
|
| 881 |
-
" review_text rating popularity_score \n",
|
| 882 |
-
"0 Had potential that went unrealized. Three 3 \n",
|
| 883 |
-
"1 The themes were solid, but not well explored. Three 3 \n",
|
| 884 |
-
"2 It simply lacked that emotional punch. Three 3 \n",
|
| 885 |
-
"3 Serviceable but not something I'd go out of my... Three 3 \n",
|
| 886 |
-
"4 Standard fare with some promise. Three 3 \n"
|
| 887 |
-
]
|
| 888 |
-
}
|
| 889 |
-
],
|
| 890 |
"source": []
|
| 891 |
}
|
| 892 |
],
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
+
"execution_count": 11,
|
| 24 |
"metadata": {
|
| 25 |
"colab": {
|
| 26 |
"base_uri": "https://localhost:8080/"
|
| 27 |
},
|
| 28 |
"id": "f48c8f8c",
|
| 29 |
+
"outputId": "f1a76cb9-ce00-47c2-df85-094eb6e9a141"
|
| 30 |
},
|
| 31 |
"outputs": [
|
| 32 |
{
|
|
|
|
| 46 |
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.3)\n",
|
| 47 |
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.3.3)\n",
|
| 48 |
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (0.12.1)\n",
|
| 49 |
+
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (4.62.1)\n",
|
| 50 |
+
"Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.5.0)\n",
|
| 51 |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (26.0)\n",
|
| 52 |
"Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (11.3.0)\n",
|
| 53 |
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (3.3.2)\n",
|
|
|
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"cell_type": "code",
|
| 88 |
+
"execution_count": 12,
|
| 89 |
"metadata": {
|
| 90 |
"id": "91d52125"
|
| 91 |
},
|
|
|
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"cell_type": "code",
|
| 116 |
+
"execution_count": null,
|
| 117 |
"metadata": {
|
| 118 |
"id": "xqO5Y3dnYhxt"
|
| 119 |
},
|
|
|
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"cell_type": "code",
|
| 148 |
+
"execution_count": null,
|
| 149 |
"metadata": {
|
| 150 |
"id": "l5FkkNhUYTHh"
|
| 151 |
},
|
|
|
|
| 163 |
},
|
| 164 |
{
|
| 165 |
"cell_type": "code",
|
| 166 |
+
"source": [
|
| 167 |
+
"import pandas as pd\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"df_books = pd.DataFrame({\n",
|
| 170 |
+
" \"title\": [\"Book A\", \"Book B\", \"Book C\"],\n",
|
| 171 |
+
" \"price\": [10.99, 12.50, 8.99],\n",
|
| 172 |
+
" \"rating\": [4, 5, 3]\n",
|
| 173 |
+
"})"
|
| 174 |
+
],
|
| 175 |
+
"metadata": {
|
| 176 |
+
"id": "j_U7YrVrrN3n"
|
| 177 |
+
},
|
| 178 |
+
"execution_count": null,
|
| 179 |
+
"outputs": []
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"cell_type": "code",
|
| 183 |
+
"source": [
|
| 184 |
+
"df_books.to_csv(\"books_data.csv\", index=False)"
|
| 185 |
+
],
|
| 186 |
+
"metadata": {
|
| 187 |
+
"id": "KJ-lE6ktrQX9"
|
| 188 |
+
},
|
| 189 |
+
"execution_count": null,
|
| 190 |
+
"outputs": []
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"cell_type": "code",
|
| 194 |
+
"source": [
|
| 195 |
+
"df_reviews = pd.DataFrame({\n",
|
| 196 |
+
" \"title\": [\"Book A\", \"Book B\"],\n",
|
| 197 |
+
" \"review\": [\"Great book\", \"Okay book\"],\n",
|
| 198 |
+
" \"sentiment_label\": [\"positive\", \"neutral\"]\n",
|
| 199 |
+
"})\n",
|
| 200 |
+
"\n",
|
| 201 |
+
"df_reviews.to_csv(\"synthetic_book_reviews.csv\", index=False)"
|
| 202 |
+
],
|
| 203 |
+
"metadata": {
|
| 204 |
+
"id": "AqZUPGJtrSET"
|
| 205 |
+
},
|
| 206 |
+
"execution_count": null,
|
| 207 |
+
"outputs": []
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"cell_type": "code",
|
| 211 |
+
"execution_count": null,
|
| 212 |
"metadata": {
|
| 213 |
"id": "lC1U_YHtZifh"
|
| 214 |
},
|
|
|
|
| 232 |
},
|
| 233 |
{
|
| 234 |
"cell_type": "code",
|
| 235 |
+
"execution_count": null,
|
| 236 |
"metadata": {
|
| 237 |
+
"id": "O_wIvTxYZqCK"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
},
|
| 239 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
"source": []
|
| 241 |
},
|
| 242 |
{
|
|
|
|
| 259 |
},
|
| 260 |
{
|
| 261 |
"cell_type": "code",
|
| 262 |
+
"execution_count": null,
|
| 263 |
"metadata": {
|
| 264 |
"id": "-gPXGcRPuV_9"
|
| 265 |
},
|
|
|
|
| 286 |
},
|
| 287 |
{
|
| 288 |
"cell_type": "code",
|
| 289 |
+
"execution_count": null,
|
| 290 |
"metadata": {
|
| 291 |
"id": "mnd5hdAbaNjz"
|
| 292 |
},
|
|
|
|
| 309 |
},
|
| 310 |
{
|
| 311 |
"cell_type": "code",
|
| 312 |
+
"execution_count": null,
|
| 313 |
"metadata": {
|
| 314 |
"id": "V-G3OCUCgR07"
|
| 315 |
},
|
|
|
|
| 327 |
},
|
| 328 |
{
|
| 329 |
"cell_type": "code",
|
| 330 |
+
"execution_count": null,
|
| 331 |
"metadata": {
|
| 332 |
"id": "kUtWmr8maZLZ"
|
| 333 |
},
|
|
|
|
| 353 |
},
|
| 354 |
{
|
| 355 |
"cell_type": "code",
|
| 356 |
+
"execution_count": null,
|
| 357 |
"metadata": {
|
| 358 |
"id": "tafQj8_7gYCG"
|
| 359 |
},
|
|
|
|
| 380 |
},
|
| 381 |
{
|
| 382 |
"cell_type": "code",
|
| 383 |
+
"execution_count": null,
|
| 384 |
"metadata": {
|
| 385 |
"id": "qkVhYPXGbgEn"
|
| 386 |
},
|
|
|
|
| 417 |
},
|
| 418 |
{
|
| 419 |
"cell_type": "code",
|
| 420 |
+
"execution_count": null,
|
| 421 |
"metadata": {
|
| 422 |
"id": "SlJ24AUafoDB"
|
| 423 |
},
|
|
|
|
| 446 |
},
|
| 447 |
{
|
| 448 |
"cell_type": "code",
|
| 449 |
+
"execution_count": null,
|
| 450 |
"metadata": {
|
| 451 |
"id": "wcN6gtiZg-ws"
|
| 452 |
},
|
|
|
|
| 464 |
},
|
| 465 |
{
|
| 466 |
"cell_type": "code",
|
| 467 |
+
"execution_count": null,
|
| 468 |
"metadata": {
|
| 469 |
+
"id": "MzbZvLcAhGaH"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
},
|
| 471 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
"source": [
|
| 473 |
"df_sales.to_csv(\"synthetic_sales_data.csv\", index=False)\n",
|
| 474 |
"\n",
|
|
|
|
| 495 |
},
|
| 496 |
{
|
| 497 |
"cell_type": "code",
|
| 498 |
+
"execution_count": null,
|
| 499 |
"metadata": {
|
| 500 |
"id": "b3cd2a50"
|
| 501 |
},
|
|
|
|
| 531 |
},
|
| 532 |
{
|
| 533 |
"cell_type": "code",
|
| 534 |
+
"execution_count": null,
|
| 535 |
"metadata": {
|
| 536 |
"id": "l2SRc3PjuTGM"
|
| 537 |
},
|
|
|
|
| 564 |
},
|
| 565 |
{
|
| 566 |
"cell_type": "code",
|
| 567 |
+
"execution_count": null,
|
| 568 |
"metadata": {
|
| 569 |
"id": "ZUKUqZsuumsp"
|
| 570 |
},
|
|
|
|
| 585 |
},
|
| 586 |
{
|
| 587 |
"cell_type": "code",
|
| 588 |
+
"execution_count": null,
|
| 589 |
"metadata": {
|
| 590 |
+
"id": "3946e521"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
},
|
| 592 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
"source": [
|
| 594 |
"import numpy as np\n",
|
| 595 |
"\n",
|
|
|
|
| 704 |
},
|
| 705 |
{
|
| 706 |
"cell_type": "code",
|
| 707 |
+
"execution_count": null,
|
| 708 |
"metadata": {
|
| 709 |
+
"id": "xfE8NMqOurKo"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
},
|
| 711 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
"source": []
|
| 713 |
}
|
| 714 |
],
|