{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### import" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: xmltodict in c:\\users\\user\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.13.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "[notice] A new release of pip is available: 23.2.1 -> 24.0\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "%pip install xmltodict" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Use request to download a xml file\n", "import requests\n", "import json\n", "import xmltodict" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# url = 'http://tisvcloud.freeway.gov.tw/history/motc20/ETag.xml'\n", "# response = requests.get(url)\n", "# with open('ETag.xml', 'wb') as f:\n", "# f.write(response.content)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# # Turn xml to json\n", "# filename = 'ETag'\n", "# with open(f'{filename}.xml', 'r', encoding='utf-8-sig') as f:\n", "# data_dict = xmltodict.parse(f.read())\n", "# data_json = json.dumps(data_dict, ensure_ascii=False)\n", "# # dump to Etag.json with json\n", "# json.dump(data_dict, open(f'{filename}.json', 'w'), ensure_ascii=False, indent=2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "'pwd' ���O�����Υ~���R�O�B�i���檺�{���Χ妸�ɡC\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read ETagPairLive_1615.xml.gz and decompress it\n", "import gzip\n", "import shutil\n", "import threading\n", "\n", "\n", "\n", "def crawl(url, filename):\n", " try:\n", " try:\n", " response = requests.get(url)\n", " except Exception as e:\n", " print(\"Error\",e)\n", " with open(f'{filename}.xml.gz', 'wb') as f:\n", " f.write(response.content)\n", " \n", " # 解壓縮\n", " with gzip.open(f'{filename}.xml.gz', 'rb') as f_in:\n", " with open(f'{filename}.xml', 'wb') as f_out:\n", " shutil.copyfileobj(f_in, f_out)\n", "\n", " # xml to json\n", " with open(f'{filename}.xml', 'r') as f:\n", " data_dict = xmltodict.parse(f.read())\n", " data_json = json.dumps(data_dict, ensure_ascii=False)\n", " # dump to Etag.json with json\n", " json.dump(data_dict, open(f'{filename}.json', 'w'), ensure_ascii=False, indent=2)\n", " except Exception as e:\n", " print(e)\n", " print(f'{url} failed')\n", " print(f'{url} done')\n", "\n", "threads = []\n", "day_list = ['202405%02d'%(x) for x in range(6, 13)]\n", "hour_list = ['%02d'%(i) for i in range(24)]\n", "five_minute_list = ['%02d'%(i) for i in range(0,60,5)]\n", "print(\"Lens = \", len(day_list) * len(hour_list) * len(five_minute_list))\n", "for day in day_list:\n", " for hour in hour_list:\n", " for minute in five_minute_list:\n", " i = hour + minute\n", " # filename = f'ETagPairLive_{str(i)}'\n", " # url = f'https://tisvcloud.freeway.gov.tw/history/motc20/ETag/20240522/{filename}.xml.gz'\n", " filename = f'./data/ETagPairLive_{day}_{str(i)}'\n", " url = f'https://tisvcloud.freeway.gov.tw/history/motc20/ETag/{day}/ETagPairLive_{str(i)}.xml.gz'\n", " t = threading.Thread(target=crawl, args=(url, filename))\n", " threads.append(t)\n", " t.start()\n", "\n", "# Wait for all threads to finish\n", "for t in threads:\n", " t.join()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import matplotlib.font_manager as fm\n", "\n", "EtagPairId = '01F0339S-01F0376S'\n", "EtagStart = '01F0339S'\n", "EtagEnd = '01F0376S'\n", "EtagStartRoad = ['五股','高公局','林口(文化一路)']\n", "VehicleTypeDict = {\n", " 31: \"Car\",\n", " 32: \"Small Truck\",\n", " 41: \"Bus\", # (含計程車)\n", " 42: \"Heavy Truck\",\n", " 5: \"Trailer\"\n", "}\n", "\n", "time_series_31data = {}\n", "time_series_32data = {}\n", "time_series_41data = {}\n", "time_series_42data = {}\n", "time_series_5data = {}\n", "\n", "\n", "day_list = ['202405%02d'%(x) for x in range(6, 13)]\n", "hour_list = ['%02d'%(i) for i in range(24)]\n", "five_minute_list = ['%02d'%(i) for i in range(0,60,5)]\n", "for day in day_list:\n", " for hour in hour_list:\n", " for minute in five_minute_list:\n", " i = hour + minute\n", " filename = f'./data/ETagPairLive_{day}_{str(i)}'\n", " try:\n", " with open(f'{filename}.json', 'r') as f:\n", " data = json.load(f)\n", " EtagPairLive = data['ETagPairLiveList']['ETagPairLives']['ETagPairLive']\n", " for ETagPair in EtagPairLive:\n", " if ETagPair['ETagPairID'] == EtagPairId:\n", " # print(ETagPair['ETagPairID'])\n", " Flow = ETagPair['Flows']['Flow']\n", " for vehicle in Flow:\n", " # print(f\"VehicleType: {VehicleTypeDict[int(vehicle['VehicleType'])]}, VehicleNum: {vehicle['VehicleCount']}, Speed: {vehicle['SpaceMeanSpeed']}, Time: {vehicle['TravelTime']}\")\n", " if int(vehicle['VehicleType']) == 31:\n", " time_series_31data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n", " elif int(vehicle['VehicleType']) == 32:\n", " time_series_32data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n", " elif int(vehicle['VehicleType']) == 41:\n", " time_series_41data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n", " elif int(vehicle['VehicleType']) == 42:\n", " time_series_42data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n", " elif int(vehicle['VehicleType']) == 5:\n", " time_series_5data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n", " except:\n", " pass\n", "\n", "step = 6\n", "plt.figure(figsize=(30,10))\n", "plt.plot(list(time_series_31data.keys())[::step], list(time_series_31data.values())[::step], label=VehicleTypeDict[31])\n", "plt.plot(list(time_series_32data.keys())[::step], list(time_series_32data.values())[::step], label=VehicleTypeDict[32])\n", "plt.plot(list(time_series_41data.keys())[::step], list(time_series_41data.values())[::step], label=VehicleTypeDict[41])\n", "plt.plot(list(time_series_42data.keys())[::step], list(time_series_42data.values())[::step], label=VehicleTypeDict[42])\n", "plt.plot(list(time_series_5data.keys())[::step], list(time_series_5data.values())[::step], label=VehicleTypeDict[5])\n", "plt.xlabel('Time')\n", "plt.xticks(fontsize=14)\n", "plt.xticks(rotation=60)\n", "plt.ylabel('VehicleCount')\n", "plt.title(f'{EtagStart} to {EtagEnd}')\n", "plt.legend()\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }