Spaces:

nelson40514
/

datamining-final

Sleeping

File size: 8,937 Bytes

d737845

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: xmltodict in c:\\users\\user\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.13.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip is available: 23.2.1 -> 24.0\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "%pip install xmltodict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use request to download a xml file\n",
    "import requests\n",
    "import json\n",
    "import xmltodict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# url = 'http://tisvcloud.freeway.gov.tw/history/motc20/ETag.xml'\n",
    "# response = requests.get(url)\n",
    "# with open('ETag.xml', 'wb') as f:\n",
    "#     f.write(response.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Turn xml to json\n",
    "# filename = 'ETag'\n",
    "# with open(f'{filename}.xml', 'r', encoding='utf-8-sig') as f:\n",
    "#     data_dict = xmltodict.parse(f.read())\n",
    "#     data_json = json.dumps(data_dict, ensure_ascii=False)\n",
    "#     # dump to Etag.json with json\n",
    "#     json.dump(data_dict, open(f'{filename}.json', 'w'), ensure_ascii=False, indent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "'pwd' ���O�����Υ~���R�O�B�i���檺�{���Χ妸�ɡC\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read ETagPairLive_1615.xml.gz and decompress it\n",
    "import gzip\n",
    "import shutil\n",
    "import threading\n",
    "\n",
    "\n",
    "\n",
    "def crawl(url, filename):\n",
    "    try:\n",
    "        try:\n",
    "            response = requests.get(url)\n",
    "        except Exception as e:\n",
    "            print(\"Error\",e)\n",
    "        with open(f'{filename}.xml.gz', 'wb') as f:\n",
    "            f.write(response.content)\n",
    "        \n",
    "        # 解壓縮\n",
    "        with gzip.open(f'{filename}.xml.gz', 'rb') as f_in:\n",
    "            with open(f'{filename}.xml', 'wb') as f_out:\n",
    "                shutil.copyfileobj(f_in, f_out)\n",
    "\n",
    "        # xml to json\n",
    "        with open(f'{filename}.xml', 'r') as f:\n",
    "            data_dict = xmltodict.parse(f.read())\n",
    "            data_json = json.dumps(data_dict, ensure_ascii=False)\n",
    "            # dump to Etag.json with json\n",
    "            json.dump(data_dict, open(f'{filename}.json', 'w'), ensure_ascii=False, indent=2)\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "        print(f'{url} failed')\n",
    "    print(f'{url} done')\n",
    "\n",
    "threads = []\n",
    "day_list = ['202405%02d'%(x) for x in range(6, 13)]\n",
    "hour_list = ['%02d'%(i) for i in range(24)]\n",
    "five_minute_list = ['%02d'%(i) for i in range(0,60,5)]\n",
    "print(\"Lens = \", len(day_list) * len(hour_list) * len(five_minute_list))\n",
    "for day in day_list:\n",
    "    for hour in hour_list:\n",
    "        for minute in five_minute_list:\n",
    "            i = hour + minute\n",
    "            # filename = f'ETagPairLive_{str(i)}'\n",
    "            # url = f'https://tisvcloud.freeway.gov.tw/history/motc20/ETag/20240522/{filename}.xml.gz'\n",
    "            filename = f'./data/ETagPairLive_{day}_{str(i)}'\n",
    "            url = f'https://tisvcloud.freeway.gov.tw/history/motc20/ETag/{day}/ETagPairLive_{str(i)}.xml.gz'\n",
    "            t = threading.Thread(target=crawl, args=(url, filename))\n",
    "            threads.append(t)\n",
    "            t.start()\n",
    "\n",
    "# Wait for all threads to finish\n",
    "for t in threads:\n",
    "    t.join()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.font_manager as fm\n",
    "\n",
    "EtagPairId = '01F0339S-01F0376S'\n",
    "EtagStart = '01F0339S'\n",
    "EtagEnd = '01F0376S'\n",
    "EtagStartRoad = ['五股','高公局','林口(文化一路)']\n",
    "VehicleTypeDict = {\n",
    "    31: \"Car\",\n",
    "    32: \"Small Truck\",\n",
    "    41: \"Bus\", # (含計程車)\n",
    "    42: \"Heavy Truck\",\n",
    "    5: \"Trailer\"\n",
    "}\n",
    "\n",
    "time_series_31data = {}\n",
    "time_series_32data = {}\n",
    "time_series_41data = {}\n",
    "time_series_42data = {}\n",
    "time_series_5data = {}\n",
    "\n",
    "\n",
    "day_list = ['202405%02d'%(x) for x in range(6, 13)]\n",
    "hour_list = ['%02d'%(i) for i in range(24)]\n",
    "five_minute_list = ['%02d'%(i) for i in range(0,60,5)]\n",
    "for day in day_list:\n",
    "    for hour in hour_list:\n",
    "        for minute in five_minute_list:\n",
    "            i = hour + minute\n",
    "            filename = f'./data/ETagPairLive_{day}_{str(i)}'\n",
    "            try:\n",
    "                with open(f'{filename}.json', 'r') as f:\n",
    "                    data = json.load(f)\n",
    "                    EtagPairLive = data['ETagPairLiveList']['ETagPairLives']['ETagPairLive']\n",
    "                    for ETagPair in EtagPairLive:\n",
    "                        if ETagPair['ETagPairID'] == EtagPairId:\n",
    "                            # print(ETagPair['ETagPairID'])\n",
    "                            Flow = ETagPair['Flows']['Flow']\n",
    "                            for vehicle in Flow:\n",
    "                                # print(f\"VehicleType: {VehicleTypeDict[int(vehicle['VehicleType'])]}, VehicleNum: {vehicle['VehicleCount']}, Speed: {vehicle['SpaceMeanSpeed']}, Time: {vehicle['TravelTime']}\")\n",
    "                                if int(vehicle['VehicleType']) == 31:\n",
    "                                    time_series_31data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n",
    "                                elif int(vehicle['VehicleType']) == 32:\n",
    "                                    time_series_32data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n",
    "                                elif int(vehicle['VehicleType']) == 41:\n",
    "                                    time_series_41data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n",
    "                                elif int(vehicle['VehicleType']) == 42:\n",
    "                                    time_series_42data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n",
    "                                elif int(vehicle['VehicleType']) == 5:\n",
    "                                    time_series_5data[f'{day}_{i}'] = int(vehicle['VehicleCount'])\n",
    "            except:\n",
    "                pass\n",
    "\n",
    "step = 6\n",
    "plt.figure(figsize=(30,10))\n",
    "plt.plot(list(time_series_31data.keys())[::step], list(time_series_31data.values())[::step], label=VehicleTypeDict[31])\n",
    "plt.plot(list(time_series_32data.keys())[::step], list(time_series_32data.values())[::step], label=VehicleTypeDict[32])\n",
    "plt.plot(list(time_series_41data.keys())[::step], list(time_series_41data.values())[::step], label=VehicleTypeDict[41])\n",
    "plt.plot(list(time_series_42data.keys())[::step], list(time_series_42data.values())[::step], label=VehicleTypeDict[42])\n",
    "plt.plot(list(time_series_5data.keys())[::step], list(time_series_5data.values())[::step], label=VehicleTypeDict[5])\n",
    "plt.xlabel('Time')\n",
    "plt.xticks(fontsize=14)\n",
    "plt.xticks(rotation=60)\n",
    "plt.ylabel('VehicleCount')\n",
    "plt.title(f'{EtagStart} to {EtagEnd}')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}