{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "041c9721", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/Combined_Flights_2022.csv')\n", "# df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')" ] }, { "cell_type": "code", "execution_count": 2, "id": "03d0f39e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "FlightDate 2022-03-15\n", "Airline Delta Air Lines Inc.\n", "Origin LAS\n", "Dest SLC\n", "Cancelled False\n", " ... \n", "ArrDel15 0.0\n", "ArrivalDelayGroups -2.0\n", "ArrTimeBlk 1600-1659\n", "DistanceGroup 2\n", "DivAirportLandings 0\n", "Name: 3504987, Length: 61, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.iloc[3504987]" ] }, { "cell_type": "code", "execution_count": 5, "id": "036418f5", "metadata": {}, "outputs": [], "source": [ "data_dict = data.to_dict(orient = 'split')" ] }, { "cell_type": "code", "execution_count": 13, "id": "ef12c4b3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FlightDate 2022-01-29\n", "Airline Frontier Airlines Inc.\n", "Origin COS\n", "Dest LAS\n", "Cancelled False\n", "Diverted False\n", "CRSDepTime 1558\n", "DepTime 1553.0\n", "DepDelayMinutes 0.0\n", "DepDelay -5.0\n", "ArrTime 1646.0\n", "ArrDelayMinutes 0.0\n", "AirTime 91.0\n", "CRSElapsedTime 124.0\n", "ActualElapsedTime 113.0\n", "Distance 604.0\n", "Year 2022\n", "Quarter 1\n", "Month 1\n", "DayofMonth 29\n", "DayOfWeek 6\n", "Marketing_Airline_Network F9\n", "Operated_or_Branded_Code_Share_Partners F9\n", "DOT_ID_Marketing_Airline 20436\n", "IATA_Code_Marketing_Airline F9\n", "Flight_Number_Marketing_Airline 2019\n", "Operating_Airline F9\n", "DOT_ID_Operating_Airline 20436\n", "IATA_Code_Operating_Airline F9\n", "Tail_Number N235FR\n", "Flight_Number_Operating_Airline 2019\n", "OriginAirportID 11109\n", "OriginAirportSeqID 1110902\n", "OriginCityMarketID 30189\n", "OriginCityName Colorado Springs, CO\n", "OriginState CO\n", "OriginStateFips 8\n", "OriginStateName Colorado\n", "OriginWac 82\n", "DestAirportID 12889\n", "DestAirportSeqID 1288903\n", "DestCityMarketID 32211\n", "DestCityName Las Vegas, NV\n", "DestState NV\n", "DestStateFips 32\n", "DestStateName Nevada\n", "DestWac 85\n", "DepDel15 0.0\n", "DepartureDelayGroups -1.0\n", "DepTimeBlk 1500-1559\n", "TaxiOut 13.0\n", "WheelsOff 1606.0\n", "WheelsOn 1637.0\n", "TaxiIn 9.0\n", "CRSArrTime 1702\n", "ArrDelay -16.0\n", "ArrDel15 0.0\n", "ArrivalDelayGroups -2.0\n", "ArrTimeBlk 1700-1759\n", "DistanceGroup 3\n", "DivAirportLandings 0\n" ] } ], "source": [ "for idx,unit in enumerate(data_dict['columns']):\n", " print(unit, data_dict['data'][3000020][idx])" ] }, { "cell_type": "code", "execution_count": 12, "id": "372b3fd9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['2022-01-29',\n", " 'Frontier Airlines Inc.',\n", " 'COS',\n", " 'LAS',\n", " False,\n", " False,\n", " 1558,\n", " 1553.0,\n", " 0.0,\n", " -5.0,\n", " 1646.0,\n", " 0.0,\n", " 91.0,\n", " 124.0,\n", " 113.0,\n", " 604.0,\n", " 2022,\n", " 1,\n", " 1,\n", " 29,\n", " 6,\n", " 'F9',\n", " 'F9',\n", " 20436,\n", " 'F9',\n", " 2019,\n", " 'F9',\n", " 20436,\n", " 'F9',\n", " 'N235FR',\n", " 2019,\n", " 11109,\n", " 1110902,\n", " 30189,\n", " 'Colorado Springs, CO',\n", " 'CO',\n", " 8,\n", " 'Colorado',\n", " 82,\n", " 12889,\n", " 1288903,\n", " 32211,\n", " 'Las Vegas, NV',\n", " 'NV',\n", " 32,\n", " 'Nevada',\n", " 85,\n", " 0.0,\n", " -1.0,\n", " '1500-1559',\n", " 13.0,\n", " 1606.0,\n", " 1637.0,\n", " 9.0,\n", " 1702,\n", " -16.0,\n", " 0.0,\n", " -2.0,\n", " '1700-1759',\n", " 3,\n", " 0]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dict['data'][3000020]" ] }, { "cell_type": "code", "execution_count": 11, "id": "371a85fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4078318\n" ] } ], "source": [ "print(len(data_dict['data']))" ] }, { "cell_type": "code", "execution_count": 12, "id": "64d46483", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FlightDate 0\n", "DepTime 7\n", "ArrTime 10\n", "ActualElapsedTime 14\n", "Distance 15\n", "OriginCityName 34\n", "DestCityName 42\n" ] } ], "source": [ "for idx,unit in enumerate(data_dict['columns']):\n", " if unit in ['FlightDate','DepTime','ArrTime','ActualElapsedTime','Distance','OriginCityName','DestCityName']:\n", " print(unit, str(idx))" ] }, { "cell_type": "code", "execution_count": 6, "id": "81047adf", "metadata": {}, "outputs": [], "source": [ "import math\n", "def convert_to_hhmm(time_float):\n", " \"\"\"\n", " Convert a float time to hh:mm format\n", " :param time_float: Time as a float. Example: 757.0\n", " :return: Time in hh:mm format. Example: \"07:57\"\n", " \"\"\"\n", " try:\n", " hours = int(time_float // 100)\n", " minutes = int(time_float % 100)\n", " return \"{:02d}:{:02d}\".format(hours, minutes)\n", " except:\n", " return time_float\n", "\n", "def minutes_to_hours_minutes(minutes):\n", " # Check for NaN and handle it\n", " if math.isnan(minutes):\n", " return \"NaN\"\n", " \n", " # Ensure minutes is an integer or rounded to the nearest integer\n", " minutes = round(minutes)\n", " \n", " hours = minutes // 60\n", " remaining_minutes = minutes % 60\n", " return f\"{hours} hours {remaining_minutes} minutes\"" ] }, { "cell_type": "code", "execution_count": 7, "id": "ee34cbde", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b60c3d13fb6d44258103c6251365272b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from tqdm.autonotebook import tqdm\n", "import random\n", "new_data = []\n", "for idx, unit in tqdm(enumerate(data_dict['data'])):\n", " tmp_dict = {k:\"\" for k in ['FlightDate','DepTime','ArrTime','ActualElapsedTime','Distance','OriginCityName','DestCityName','Price']}\n", " tmp_dict['FlightDate'] = unit[0]\n", " tmp_dict['DepTime'] = convert_to_hhmm(unit[7])\n", " tmp_dict['ArrTime'] = convert_to_hhmm(unit[10])\n", " tmp_dict['ActualElapsedTime'] = minutes_to_hours_minutes(unit[14])\n", " tmp_dict['Distance'] = unit[15]\n", " tmp_dict['OriginCityName'] = unit[34].split(',')[0].split('/')[0]\n", " tmp_dict['DestCityName'] = unit[42].split(',')[0].split('/')[0]\n", " tmp_dict['Price'] = int((unit[15]) * random.uniform(0.2,0.5))\n", " new_data.append(tmp_dict)" ] }, { "cell_type": "code", "execution_count": 11, "id": "aee3f422", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'FlightDate': '2022-01-29',\n", " 'DepTime': '15:53',\n", " 'ArrTime': '16:46',\n", " 'ActualElapsedTime': '1 hours 53 minutes',\n", " 'Distance': 604.0,\n", " 'OriginCityName': 'Colorado Springs',\n", " 'DestCityName': 'Las Vegas',\n", " 'Price': 205}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data[3000020]" ] }, { "cell_type": "code", "execution_count": 90, "id": "bfb243c0", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(new_data)" ] }, { "cell_type": "code", "execution_count": 62, "id": "f152a150", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDateDepTimeArrTimeActualElapsedTimeDistanceOriginCityNameDestCityNamePrice
34883942022-03-0107:2415:154 hours 51 minutes2422.0SeattleNew York720
35093822022-03-0122:2906:074 hours 38 minutes2422.0SeattleNew York484
37360562022-03-0123:3307:164 hours 43 minutes2422.0SeattleNew York1199
37362602022-03-0114:3722:054 hours 28 minutes2422.0SeattleNew York950
37363132022-03-0109:1117:175 hours 6 minutes2422.0SeattleNew York1050
37768582022-03-0121:0104:324 hours 31 minutes2422.0SeattleNew York1146
37785652022-03-0113:1821:084 hours 50 minutes2422.0SeattleNew York578
\n", "
" ], "text/plain": [ " FlightDate DepTime ArrTime ActualElapsedTime Distance \n", "3488394 2022-03-01 07:24 15:15 4 hours 51 minutes 2422.0 \\\n", "3509382 2022-03-01 22:29 06:07 4 hours 38 minutes 2422.0 \n", "3736056 2022-03-01 23:33 07:16 4 hours 43 minutes 2422.0 \n", "3736260 2022-03-01 14:37 22:05 4 hours 28 minutes 2422.0 \n", "3736313 2022-03-01 09:11 17:17 5 hours 6 minutes 2422.0 \n", "3776858 2022-03-01 21:01 04:32 4 hours 31 minutes 2422.0 \n", "3778565 2022-03-01 13:18 21:08 4 hours 50 minutes 2422.0 \n", "\n", " OriginCityName DestCityName Price \n", "3488394 Seattle New York 720 \n", "3509382 Seattle New York 484 \n", "3736056 Seattle New York 1199 \n", "3736260 Seattle New York 950 \n", "3736313 Seattle New York 1050 \n", "3776858 Seattle New York 1146 \n", "3778565 Seattle New York 578 " ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['OriginCityName']=='Seattle') & (df['DestCityName']=='New York')& (df['FlightDate']=='2022-03-01')]" ] }, { "cell_type": "code", "execution_count": 92, "id": "9f85d8e6", "metadata": {}, "outputs": [], "source": [ "df['Flight Number'] = df.index" ] }, { "cell_type": "code", "execution_count": 93, "id": "045df94c", "metadata": {}, "outputs": [], "source": [ "df = df.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "4e1b68b7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 91, "id": "5c7d3b44", "metadata": {}, "outputs": [], "source": [ "df.index = df.index.map(lambda x: str(x).zfill(7))" ] }, { "cell_type": "code", "execution_count": 94, "id": "7a1f223c", "metadata": {}, "outputs": [], "source": [ "df['Flight Number'] = 'F' + df['Flight Number'].astype(str)" ] }, { "cell_type": "code", "execution_count": 97, "id": "af7e3411", "metadata": {}, "outputs": [], "source": [ "df.to_csv('/home/xj/toolAugEnv/code/toolConstraint/database/flights/clean_Flights_2022.csv')" ] }, { "cell_type": "code", "execution_count": 95, "id": "461e83ef", "metadata": {}, "outputs": [], "source": [ "x = df[df['OriginCityName']=='Montrose']" ] }, { "cell_type": "code", "execution_count": 53, "id": "ed4e2107", "metadata": {}, "outputs": [], "source": [ "x = df[df['DestCityName']=='Montrose']" ] }, { "cell_type": "code", "execution_count": 96, "id": "56c918e3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDateDepTimeArrTimeActualElapsedTimeDistanceOriginCityNameDestCityNamePriceFlight Number
41552022-04-0110:4211:340 hours 52 minutes196.0MontroseDenver42F0004155
41562022-04-0114:4315:320 hours 49 minutes196.0MontroseDenver64F0004156
41572022-04-0117:3818:581 hours 20 minutes196.0MontroseDenver97F0004157
74392022-04-0213:3816:321 hours 54 minutes733.0MontroseDallas151F0007439
74402022-04-0212:3713:290 hours 52 minutes196.0MontroseDenver54F0007440
..............................
40451392022-03-2711:5012:171 hours 27 minutes419.0MontrosePhoenix133F4045139
40451402022-03-2811:4512:221 hours 37 minutes419.0MontrosePhoenix188F4045140
40451412022-03-2911:3512:171 hours 42 minutes419.0MontrosePhoenix144F4045141
40451422022-03-3011:3812:131 hours 35 minutes419.0MontrosePhoenix125F4045142
40451432022-03-3111:4012:191 hours 39 minutes419.0MontrosePhoenix129F4045143
\n", "

2035 rows × 9 columns

\n", "
" ], "text/plain": [ " FlightDate DepTime ArrTime ActualElapsedTime Distance \n", "4155 2022-04-01 10:42 11:34 0 hours 52 minutes 196.0 \\\n", "4156 2022-04-01 14:43 15:32 0 hours 49 minutes 196.0 \n", "4157 2022-04-01 17:38 18:58 1 hours 20 minutes 196.0 \n", "7439 2022-04-02 13:38 16:32 1 hours 54 minutes 733.0 \n", "7440 2022-04-02 12:37 13:29 0 hours 52 minutes 196.0 \n", "... ... ... ... ... ... \n", "4045139 2022-03-27 11:50 12:17 1 hours 27 minutes 419.0 \n", "4045140 2022-03-28 11:45 12:22 1 hours 37 minutes 419.0 \n", "4045141 2022-03-29 11:35 12:17 1 hours 42 minutes 419.0 \n", "4045142 2022-03-30 11:38 12:13 1 hours 35 minutes 419.0 \n", "4045143 2022-03-31 11:40 12:19 1 hours 39 minutes 419.0 \n", "\n", " OriginCityName DestCityName Price Flight Number \n", "4155 Montrose Denver 42 F0004155 \n", "4156 Montrose Denver 64 F0004156 \n", "4157 Montrose Denver 97 F0004157 \n", "7439 Montrose Dallas 151 F0007439 \n", "7440 Montrose Denver 54 F0007440 \n", "... ... ... ... ... \n", "4045139 Montrose Phoenix 133 F4045139 \n", "4045140 Montrose Phoenix 188 F4045140 \n", "4045141 Montrose Phoenix 144 F4045141 \n", "4045142 Montrose Phoenix 125 F4045142 \n", "4045143 Montrose Phoenix 129 F4045143 \n", "\n", "[2035 rows x 9 columns]" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 52, "id": "74dfd3cd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDateDepTimeArrTimeActualElapsedTimeDistanceOriginCityNameDestCityNamePrice
13692022-04-0109:0410:231 hours 19 minutes229.0WashingtonNew York105
13702022-04-0111:0712:291 hours 22 minutes229.0WashingtonNew York56
13802022-04-0113:2014:521 hours 32 minutes229.0WashingtonNew York95
14092022-04-0119:0320:231 hours 20 minutes229.0WashingtonNew York71
14362022-04-0115:3217:031 hours 31 minutes229.0WashingtonNew York64
...........................
5654442022-04-0122:5507:095 hours 14 minutes2475.0Los AngelesNew York621
5654462022-04-0111:3919:495 hours 10 minutes2475.0Los AngelesNew York1100
5655112022-04-0115:4822:164 hours 28 minutes1620.0DenverNew York575
5655412022-04-0117:3623:053 hours 29 minutes1620.0DenverNew York669
5655812022-04-0120:5223:481 hours 56 minutes733.0ChicagoNew York338
\n", "

889 rows × 8 columns

\n", "
" ], "text/plain": [ " FlightDate DepTime ArrTime ActualElapsedTime Distance \n", "1369 2022-04-01 09:04 10:23 1 hours 19 minutes 229.0 \\\n", "1370 2022-04-01 11:07 12:29 1 hours 22 minutes 229.0 \n", "1380 2022-04-01 13:20 14:52 1 hours 32 minutes 229.0 \n", "1409 2022-04-01 19:03 20:23 1 hours 20 minutes 229.0 \n", "1436 2022-04-01 15:32 17:03 1 hours 31 minutes 229.0 \n", "... ... ... ... ... ... \n", "565444 2022-04-01 22:55 07:09 5 hours 14 minutes 2475.0 \n", "565446 2022-04-01 11:39 19:49 5 hours 10 minutes 2475.0 \n", "565511 2022-04-01 15:48 22:16 4 hours 28 minutes 1620.0 \n", "565541 2022-04-01 17:36 23:05 3 hours 29 minutes 1620.0 \n", "565581 2022-04-01 20:52 23:48 1 hours 56 minutes 733.0 \n", "\n", " OriginCityName DestCityName Price \n", "1369 Washington New York 105 \n", "1370 Washington New York 56 \n", "1380 Washington New York 95 \n", "1409 Washington New York 71 \n", "1436 Washington New York 64 \n", "... ... ... ... \n", "565444 Los Angeles New York 621 \n", "565446 Los Angeles New York 1100 \n", "565511 Denver New York 575 \n", "565541 Denver New York 669 \n", "565581 Chicago New York 338 \n", "\n", "[889 rows x 8 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x[x['FlightDate']=='2022-04-01']" ] }, { "cell_type": "code", "execution_count": 58, "id": "93c2a26f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Manhattan', 'Ft. Riley']\n" ] } ], "source": [ "print('Manhattan/Ft. Riley'.split('/'))" ] }, { "cell_type": "code", "execution_count": null, "id": "86b394bf", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }