{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import xml.etree.ElementTree as ET\n", "import requests\n", "import csv" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "xml_url = \"https://wayback.maptiles.arcgis.com/arcgis/rest/services/world_imagery/mapserver/wmts/1.0.0/wmtscapabilities.xml\"" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# parse the xml\n", "xml = requests.get(xml_url).text\n", "root = ET.fromstring(xml)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Namespace definitions\n", "namespaces = {\n", " 'ows': 'https://www.opengis.net/ows/1.1',\n", " '': 'https://www.opengis.net/wmts/1.0' # Default namespace\n", "}\n", "\n", "# Open CSV file for writing\n", "with open('wayback.csv', 'w', newline='') as csv_file:\n", " writer = csv.writer(csv_file)\n", "\n", " # Write header row\n", " writer.writerow([\n", " 'Title', \n", " 'Identifier', \n", " 'LowerCorner', \n", " 'UpperCorner', \n", " 'Format', \n", " 'TileMatrixSetLinks', \n", " 'ResourceURL_Template'\n", " ])\n", "\n", " # Extract and write data\n", " for layer in root.findall('.//Layer', namespaces):\n", " title = layer.find('ows:Title', namespaces).text\n", " identifier = layer.find('ows:Identifier', namespaces).text\n", "\n", " bounding_box = layer.find('ows:BoundingBox/ows:LowerCorner', namespaces)\n", " lower_corner = bounding_box.text if bounding_box is not None else ''\n", "\n", " bounding_box = layer.find('ows:BoundingBox/ows:UpperCorner', namespaces)\n", " upper_corner = bounding_box.text if bounding_box is not None else ''\n", "\n", " fmt = layer.find('Format', namespaces).text if layer.find('Format', namespaces) else ''\n", "\n", " tile_matrix_links = [tms.text for tms in layer.findall('TileMatrixSetLink/TileMatrixSet', namespaces)]\n", " tile_matrix_set_links = ', '.join(tile_matrix_links)\n", "\n", " resource_url = layer.find('ResourceURL', namespaces)\n", " resource_url_template = resource_url.get('template') if resource_url is not None else ''\n", "\n", " writer.writerow([\n", " title,\n", " identifier,\n", " lower_corner,\n", " upper_corner,\n", " fmt,\n", " tile_matrix_set_links,\n", " resource_url_template\n", " ])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Parse dates" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitleIdentifierLowerCornerUpperCornerFormatTileMatrixSetLinksResourceURL_Template
0World Imagery (Wayback 2024-10-10)WB_2024_R11-2.003750722959434E7 -2.003750722959434E72.003750722959434E7 2.003750722959434E7NaNdefault028mm, GoogleMapsCompatiblehttps://wayback.maptiles.arcgis.com/arcgis/res...
1World Imagery (Wayback 2024-09-19)WB_2024_R10-2.003750722959434E7 -2.003750722959434E72.003750722959434E7 2.003750722959434E7NaNdefault028mm, GoogleMapsCompatiblehttps://wayback.maptiles.arcgis.com/arcgis/res...
\n", "
" ], "text/plain": [ " Title Identifier \\\n", "0 World Imagery (Wayback 2024-10-10) WB_2024_R11 \n", "1 World Imagery (Wayback 2024-09-19) WB_2024_R10 \n", "\n", " LowerCorner \\\n", "0 -2.003750722959434E7 -2.003750722959434E7 \n", "1 -2.003750722959434E7 -2.003750722959434E7 \n", "\n", " UpperCorner Format \\\n", "0 2.003750722959434E7 2.003750722959434E7 NaN \n", "1 2.003750722959434E7 2.003750722959434E7 NaN \n", "\n", " TileMatrixSetLinks \\\n", "0 default028mm, GoogleMapsCompatible \n", "1 default028mm, GoogleMapsCompatible \n", "\n", " ResourceURL_Template \n", "0 https://wayback.maptiles.arcgis.com/arcgis/res... \n", "1 https://wayback.maptiles.arcgis.com/arcgis/res... " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('wayback.csv')\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitleIdentifierLowerCornerUpperCornerFormatTileMatrixSetLinksResourceURL_Templatedate
0World Imagery (Wayback 2024-10-10)WB_2024_R11-2.003750722959434E7 -2.003750722959434E72.003750722959434E7 2.003750722959434E7NaNdefault028mm, GoogleMapsCompatiblehttps://wayback.maptiles.arcgis.com/arcgis/res...2024-10-10
1World Imagery (Wayback 2024-09-19)WB_2024_R10-2.003750722959434E7 -2.003750722959434E72.003750722959434E7 2.003750722959434E7NaNdefault028mm, GoogleMapsCompatiblehttps://wayback.maptiles.arcgis.com/arcgis/res...2024-09-19
\n", "
" ], "text/plain": [ " Title Identifier \\\n", "0 World Imagery (Wayback 2024-10-10) WB_2024_R11 \n", "1 World Imagery (Wayback 2024-09-19) WB_2024_R10 \n", "\n", " LowerCorner \\\n", "0 -2.003750722959434E7 -2.003750722959434E7 \n", "1 -2.003750722959434E7 -2.003750722959434E7 \n", "\n", " UpperCorner Format \\\n", "0 2.003750722959434E7 2.003750722959434E7 NaN \n", "1 2.003750722959434E7 2.003750722959434E7 NaN \n", "\n", " TileMatrixSetLinks \\\n", "0 default028mm, GoogleMapsCompatible \n", "1 default028mm, GoogleMapsCompatible \n", "\n", " ResourceURL_Template date \n", "0 https://wayback.maptiles.arcgis.com/arcgis/res... 2024-10-10 \n", "1 https://wayback.maptiles.arcgis.com/arcgis/res... 2024-09-19 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['date'] = df['Title'].str.extract(r'(\\d{4}-\\d{2}-\\d{2})')\n", "df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df.to_parquet('wayback.parquet', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "zeel_py310", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 2 }