{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "a10c60c4", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/Combined_Flights_2022.csv')" ] }, { "cell_type": "code", "execution_count": 17, "id": "1c12a0f0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDateAirlineOriginDestCancelledDivertedCRSDepTimeDepTimeDepDelayMinutesDepDelay...WheelsOffWheelsOnTaxiInCRSArrTimeArrDelayArrDel15ArrivalDelayGroupsArrTimeBlkDistanceGroupDivAirportLandings
02022-04-04Commutair Aka Champlain Enterprises, Inc.GJTDENFalseFalse11331123.00.0-10.0...1140.01220.08.01245-17.00.0-2.01200-125910
12022-04-04Commutair Aka Champlain Enterprises, Inc.HRLIAHFalseFalse732728.00.0-4.0...744.0839.09.0849-1.00.0-1.00800-085920
22022-04-04Commutair Aka Champlain Enterprises, Inc.DRODENFalseFalse15291514.00.0-15.0...1535.01622.014.01639-3.00.0-1.01600-165920
32022-04-04Commutair Aka Champlain Enterprises, Inc.IAHGPTFalseFalse14351430.00.0-5.0...1446.01543.04.01605-18.00.0-2.01600-165920
42022-04-04Commutair Aka Champlain Enterprises, Inc.DRODENFalseFalse11351135.00.00.0...1154.01243.08.012456.00.00.01200-125920
..................................................................
40783132022-03-31Republic AirlinesMSYEWRFalseTrue19492014.025.025.0...2031.0202.032.02354NaNNaNNaN2300-235951
40783142022-03-17Republic AirlinesCLTEWRTrueFalse17331817.044.044.0...NaNNaNNaN1942NaNNaNNaN1900-195930
40783152022-03-08Republic AirlinesALBORDFalseFalse17002318.0378.0378.0...2337.052.07.01838381.01.012.01800-185930
40783162022-03-25Republic AirlinesEWRPITFalseTrue21292322.0113.0113.0...2347.0933.06.02255NaNNaNNaN2200-225921
40783172022-03-07Republic AirlinesEWRRDUFalseTrue11541148.00.0-6.0...1201.01552.04.01333NaNNaNNaN1300-135921
\n", "

4078318 rows × 61 columns

\n", "
" ], "text/plain": [ " FlightDate Airline Origin Dest \\\n", "0 2022-04-04 Commutair Aka Champlain Enterprises, Inc. GJT DEN \n", "1 2022-04-04 Commutair Aka Champlain Enterprises, Inc. HRL IAH \n", "2 2022-04-04 Commutair Aka Champlain Enterprises, Inc. DRO DEN \n", "3 2022-04-04 Commutair Aka Champlain Enterprises, Inc. IAH GPT \n", "4 2022-04-04 Commutair Aka Champlain Enterprises, Inc. DRO DEN \n", "... ... ... ... ... \n", "4078313 2022-03-31 Republic Airlines MSY EWR \n", "4078314 2022-03-17 Republic Airlines CLT EWR \n", "4078315 2022-03-08 Republic Airlines ALB ORD \n", "4078316 2022-03-25 Republic Airlines EWR PIT \n", "4078317 2022-03-07 Republic Airlines EWR RDU \n", "\n", " Cancelled Diverted CRSDepTime DepTime DepDelayMinutes DepDelay \\\n", "0 False False 1133 1123.0 0.0 -10.0 \n", "1 False False 732 728.0 0.0 -4.0 \n", "2 False False 1529 1514.0 0.0 -15.0 \n", "3 False False 1435 1430.0 0.0 -5.0 \n", "4 False False 1135 1135.0 0.0 0.0 \n", "... ... ... ... ... ... ... \n", "4078313 False True 1949 2014.0 25.0 25.0 \n", "4078314 True False 1733 1817.0 44.0 44.0 \n", "4078315 False False 1700 2318.0 378.0 378.0 \n", "4078316 False True 2129 2322.0 113.0 113.0 \n", "4078317 False True 1154 1148.0 0.0 -6.0 \n", "\n", " ... WheelsOff WheelsOn TaxiIn CRSArrTime ArrDelay ArrDel15 \\\n", "0 ... 1140.0 1220.0 8.0 1245 -17.0 0.0 \n", "1 ... 744.0 839.0 9.0 849 -1.0 0.0 \n", "2 ... 1535.0 1622.0 14.0 1639 -3.0 0.0 \n", "3 ... 1446.0 1543.0 4.0 1605 -18.0 0.0 \n", "4 ... 1154.0 1243.0 8.0 1245 6.0 0.0 \n", "... ... ... ... ... ... ... ... \n", "4078313 ... 2031.0 202.0 32.0 2354 NaN NaN \n", "4078314 ... NaN NaN NaN 1942 NaN NaN \n", "4078315 ... 2337.0 52.0 7.0 1838 381.0 1.0 \n", "4078316 ... 2347.0 933.0 6.0 2255 NaN NaN \n", "4078317 ... 1201.0 1552.0 4.0 1333 NaN NaN \n", "\n", " ArrivalDelayGroups ArrTimeBlk DistanceGroup DivAirportLandings \n", "0 -2.0 1200-1259 1 0 \n", "1 -1.0 0800-0859 2 0 \n", "2 -1.0 1600-1659 2 0 \n", "3 -2.0 1600-1659 2 0 \n", "4 0.0 1200-1259 2 0 \n", "... ... ... ... ... \n", "4078313 NaN 2300-2359 5 1 \n", "4078314 NaN 1900-1959 3 0 \n", "4078315 12.0 1800-1859 3 0 \n", "4078316 NaN 2200-2259 2 1 \n", "4078317 NaN 1300-1359 2 1 \n", "\n", "[4078318 rows x 61 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 6, "id": "e572970c", "metadata": {}, "outputs": [], "source": [ "str_data = data.astype(str)" ] }, { "cell_type": "code", "execution_count": 7, "id": "27a7cd6b", "metadata": {}, "outputs": [], "source": [ "column_names = ', '.join(data.columns.tolist())" ] }, { "cell_type": "code", "execution_count": 8, "id": "a5cf51cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'FlightDate, Airline, Origin, Dest, Cancelled, Diverted, CRSDepTime, DepTime, DepDelayMinutes, DepDelay, ArrTime, ArrDelayMinutes, AirTime, CRSElapsedTime, ActualElapsedTime, Distance, Year, Quarter, Month, DayofMonth, DayOfWeek, Marketing_Airline_Network, Operated_or_Branded_Code_Share_Partners, DOT_ID_Marketing_Airline, IATA_Code_Marketing_Airline, Flight_Number_Marketing_Airline, Operating_Airline, DOT_ID_Operating_Airline, IATA_Code_Operating_Airline, Tail_Number, Flight_Number_Operating_Airline, OriginAirportID, OriginAirportSeqID, OriginCityMarketID, OriginCityName, OriginState, OriginStateFips, OriginStateName, OriginWac, DestAirportID, DestAirportSeqID, DestCityMarketID, DestCityName, DestState, DestStateFips, DestStateName, DestWac, DepDel15, DepartureDelayGroups, DepTimeBlk, TaxiOut, WheelsOff, WheelsOn, TaxiIn, CRSArrTime, ArrDelay, ArrDel15, ArrivalDelayGroups, ArrTimeBlk, DistanceGroup, DivAirportLandings'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "column_names" ] }, { "cell_type": "code", "execution_count": 15, "id": "061d3979", "metadata": {}, "outputs": [], "source": [ "pd.options.display.max_rows = 300" ] }, { "cell_type": "code", "execution_count": 21, "id": "e44254fc", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FlightDate 2022-04-04\n", "Airline Commutair Aka Champlain Enterprises, Inc.\n", "Origin DEN\n", "Dest CPR\n", "Cancelled False\n", "Diverted False\n", "CRSDepTime 1540\n", "DepTime 1546.0\n", "DepDelayMinutes 6.0\n", "DepDelay 6.0\n", "ArrTime 1650.0\n", "ArrDelayMinutes 0.0\n", "AirTime 45.0\n", "CRSElapsedTime 70.0\n", "ActualElapsedTime 64.0\n", "Distance 230.0\n", "Year 2022\n", "Quarter 2\n", "Month 4\n", "DayofMonth 4\n", "DayOfWeek 1\n", "Marketing_Airline_Network UA\n", "Operated_or_Branded_Code_Share_Partners UA_CODESHARE\n", "DOT_ID_Marketing_Airline 19977\n", "IATA_Code_Marketing_Airline UA\n", "Flight_Number_Marketing_Airline 4288\n", "Operating_Airline C5\n", "DOT_ID_Operating_Airline 20445\n", "IATA_Code_Operating_Airline C5\n", "Tail_Number N14177\n", "Flight_Number_Operating_Airline 4288\n", "OriginAirportID 11292\n", "OriginAirportSeqID 1129202\n", "OriginCityMarketID 30325\n", "OriginCityName Denver, CO\n", "OriginState CO\n", "OriginStateFips 8\n", "OriginStateName Colorado\n", "OriginWac 82\n", "DestAirportID 11122\n", "DestAirportSeqID 1112205\n", "DestCityMarketID 31122\n", "DestCityName Casper, WY\n", "DestState WY\n", "DestStateFips 56\n", "DestStateName Wyoming\n", "DestWac 88\n", "DepDel15 0.0\n", "DepartureDelayGroups 0.0\n", "DepTimeBlk 1500-1559\n", "TaxiOut 13.0\n", "WheelsOff 1559.0\n", "WheelsOn 1644.0\n", "TaxiIn 6.0\n", "CRSArrTime 1650\n", "ArrDelay 0.0\n", "ArrDel15 0.0\n", "ArrivalDelayGroups 0.0\n", "ArrTimeBlk 1600-1659\n", "DistanceGroup 1\n", "DivAirportLandings 0\n", "Name: 10, dtype: object\n" ] } ], "source": [ "print(data.iloc[10],flush=True)" ] }, { "cell_type": "code", "execution_count": 57, "id": "1a0f208d", "metadata": {}, "outputs": [], "source": [ "filter_data = data[['FlightDate','DepTime','ArrTime','Distance','OriginCityName','DestCityName']]" ] }, { "cell_type": "code", "execution_count": 58, "id": "4cf6383d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FlightDate\n", "DepTime\n", "ArrTime\n", "Distance\n", "OriginCityName\n", "DestCityName\n" ] } ], "source": [ "for unit in filter_data:\n", " print(unit)" ] }, { "cell_type": "code", "execution_count": 63, "id": "ddbf175a", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fc2986be7e664907986359e2bcf22671", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "IOPub message rate exceeded.\n", "The notebook server will temporarily stop sending output\n", "to the client in order to avoid crashing it.\n", "To change this limit, set the config variable\n", "`--NotebookApp.iopub_msg_rate_limit`.\n", "\n", "Current values:\n", "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", "NotebookApp.rate_limit_window=3.0 (secs)\n", "\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[63], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtqdm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mautonotebook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tqdm\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx, unit \u001b[38;5;129;01min\u001b[39;00m tqdm(\u001b[38;5;28menumerate\u001b[39m(filter_data\u001b[38;5;241m.\u001b[39miterrows())):\n\u001b[0;32m----> 3\u001b[0m \u001b[43mfilter_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43midx\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mOriginCityName\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOriginCityName\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 4\u001b[0m filter_data\u001b[38;5;241m.\u001b[39mloc[idx,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDestCityName\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m unit[\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDestCityName\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m,\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 5\u001b[0m filter_data\u001b[38;5;241m.\u001b[39mloc[idx,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprice\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m((unit[\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistance\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;241m*\u001b[39m random\u001b[38;5;241m.\u001b[39muniform(\u001b[38;5;241m0.2\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexing.py:849\u001b[0m, in \u001b[0;36m_LocationIndexer.__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_valid_setitem_indexer(key)\n\u001b[1;32m 848\u001b[0m iloc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miloc\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39miloc\n\u001b[0;32m--> 849\u001b[0m \u001b[43miloc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexing.py:1835\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1832\u001b[0m \u001b[38;5;66;03m# align and set the values\u001b[39;00m\n\u001b[1;32m 1833\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m take_split_path:\n\u001b[1;32m 1834\u001b[0m \u001b[38;5;66;03m# We have to operate column-wise\u001b[39;00m\n\u001b[0;32m-> 1835\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_with_indexer_split_path\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1836\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1837\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setitem_single_block(indexer, value, name)\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexing.py:1928\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_with_indexer_split_path\u001b[0;34m(self, indexer, value, name)\u001b[0m\n\u001b[1;32m 1925\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1926\u001b[0m \u001b[38;5;66;03m# scalar value\u001b[39;00m\n\u001b[1;32m 1927\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m loc \u001b[38;5;129;01min\u001b[39;00m ilocs:\n\u001b[0;32m-> 1928\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setitem_single_column\u001b[49m\u001b[43m(\u001b[49m\u001b[43mloc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpi\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexing.py:2034\u001b[0m, in \u001b[0;36m_iLocIndexer._setitem_single_column\u001b[0;34m(self, loc, value, plane_indexer)\u001b[0m\n\u001b[1;32m 2030\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39misetitem(loc, value)\n\u001b[1;32m 2031\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2032\u001b[0m \u001b[38;5;66;03m# set value into the column (first attempting to operate inplace, then\u001b[39;00m\n\u001b[1;32m 2033\u001b[0m \u001b[38;5;66;03m# falling back to casting if necessary)\u001b[39;00m\n\u001b[0;32m-> 2034\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumn_setitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mloc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mplane_indexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2036\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_clear_item_cache()\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/internals/managers.py:1385\u001b[0m, in \u001b[0;36mBlockManager.column_setitem\u001b[0;34m(self, loc, idx, value, inplace_only)\u001b[0m\n\u001b[1;32m 1383\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1384\u001b[0m new_mgr \u001b[38;5;241m=\u001b[39m col_mgr\u001b[38;5;241m.\u001b[39msetitem((idx,), value)\n\u001b[0;32m-> 1385\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mloc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_block\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/internals/managers.py:1213\u001b[0m, in \u001b[0;36mBlockManager.iset\u001b[0;34m(self, loc, value, inplace)\u001b[0m\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_iset_split_block(blkno_l, blk_locs, value_getitem(val_locs))\n\u001b[1;32m 1212\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1213\u001b[0m \u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mset_inplace\u001b[49m\u001b[43m(\u001b[49m\u001b[43mblk_locs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_getitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mval_locs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1214\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 1215\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/py39/lib/python3.9/site-packages/pandas/core/internals/blocks.py:924\u001b[0m, in \u001b[0;36mBlock.set_inplace\u001b[0;34m(self, locs, values, copy)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy:\n\u001b[1;32m 923\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[0;32m--> 924\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlocs\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m values\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "from tqdm.autonotebook import tqdm\n", "for idx, unit in tqdm(enumerate(filter_data.iterrows())):\n", " filter_data.loc[idx,'OriginCityName'] = unit[1]['OriginCityName'].split(',')[0]\n", " filter_data.loc[idx,'DestCityName'] = unit[1]['DestCityName'].split(',')[0]\n", " filter_data.loc[idx,'price'] = int((unit[1]['Distance']) * random.uniform(0.2,0.5))" ] }, { "cell_type": "code", "execution_count": 62, "id": "0ac9a59d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDateDepTimeArrTimeDistanceOriginCityNameDestCityNameprice
02022-04-041123.01228.0212.0Grand JunctionDenver72.0
12022-04-04728.0848.0295.0Harlingen/San BenitoHouston141.0
22022-04-041514.01636.0251.0DurangoDenver114.0
32022-04-041430.01547.0376.0HoustonGulfport/Biloxi103.0
42022-04-041135.01251.0251.0DurangoDenver118.0
........................
40783132022-03-312014.0234.01167.0New Orleans, LANewark, NJNaN
40783142022-03-171817.0NaN529.0Charlotte, NCNewark, NJNaN
40783152022-03-082318.059.0723.0Albany, NYChicago, ILNaN
40783162022-03-252322.0939.0319.0Newark, NJPittsburgh, PANaN
40783172022-03-071148.01556.0416.0Newark, NJRaleigh/Durham, NCNaN
\n", "

4078318 rows × 7 columns

\n", "
" ], "text/plain": [ " FlightDate DepTime ArrTime Distance OriginCityName \\\n", "0 2022-04-04 1123.0 1228.0 212.0 Grand Junction \n", "1 2022-04-04 728.0 848.0 295.0 Harlingen/San Benito \n", "2 2022-04-04 1514.0 1636.0 251.0 Durango \n", "3 2022-04-04 1430.0 1547.0 376.0 Houston \n", "4 2022-04-04 1135.0 1251.0 251.0 Durango \n", "... ... ... ... ... ... \n", "4078313 2022-03-31 2014.0 234.0 1167.0 New Orleans, LA \n", "4078314 2022-03-17 1817.0 NaN 529.0 Charlotte, NC \n", "4078315 2022-03-08 2318.0 59.0 723.0 Albany, NY \n", "4078316 2022-03-25 2322.0 939.0 319.0 Newark, NJ \n", "4078317 2022-03-07 1148.0 1556.0 416.0 Newark, NJ \n", "\n", " DestCityName price \n", "0 Denver 72.0 \n", "1 Houston 141.0 \n", "2 Denver 114.0 \n", "3 Gulfport/Biloxi 103.0 \n", "4 Denver 118.0 \n", "... ... ... \n", "4078313 Newark, NJ NaN \n", "4078314 Newark, NJ NaN \n", "4078315 Chicago, IL NaN \n", "4078316 Pittsburgh, PA NaN \n", "4078317 Raleigh/Durham, NC NaN \n", "\n", "[4078318 rows x 7 columns]" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filter_data" ] }, { "cell_type": "code", "execution_count": 43, "id": "10a2b0e3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b7bbe97af9df406fb0e56a6f8dfd8656", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(0, FlightDate 2022-04-04\n", "DepTime 1123.0\n", "ArrTime 1228.0\n", "Distance 212.0\n", "OriginCityName Grand Junction\n", "DestCityName Denver\n", "Name: 0, dtype: object)\n" ] } ], "source": [ "import random\n", "for idx, unit in tqdm(enumerate(filter_data.iterrows())):\n", " filter_data.loc[idx,'price'] = eval(unit[1]['Distance']) * random.uniform(0.2,0.5))" ] }, { "cell_type": "code", "execution_count": 44, "id": "1ca1f597", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1504075/2172656540.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filter_data['price'] = None\n" ] } ], "source": [ "filter_data['price'] = None" ] }, { "cell_type": "code", "execution_count": null, "id": "e32d2e3c", "metadata": {}, "outputs": [], "source": [ "import random\n", "random.uniform(0.2,0.5)" ] }, { "cell_type": "code", "execution_count": null, "id": "cf4ecc5e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }