{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import xml.etree.ElementTree as ET\n",
"import requests\n",
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"xml_url = \"https://wayback.maptiles.arcgis.com/arcgis/rest/services/world_imagery/mapserver/wmts/1.0.0/wmtscapabilities.xml\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# parse the xml\n",
"xml = requests.get(xml_url).text\n",
"root = ET.fromstring(xml)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Namespace definitions\n",
"namespaces = {\n",
" 'ows': 'https://www.opengis.net/ows/1.1',\n",
" '': 'https://www.opengis.net/wmts/1.0' # Default namespace\n",
"}\n",
"\n",
"# Open CSV file for writing\n",
"with open('wayback.csv', 'w', newline='') as csv_file:\n",
" writer = csv.writer(csv_file)\n",
"\n",
" # Write header row\n",
" writer.writerow([\n",
" 'Title', \n",
" 'Identifier', \n",
" 'LowerCorner', \n",
" 'UpperCorner', \n",
" 'Format', \n",
" 'TileMatrixSetLinks', \n",
" 'ResourceURL_Template'\n",
" ])\n",
"\n",
" # Extract and write data\n",
" for layer in root.findall('.//Layer', namespaces):\n",
" title = layer.find('ows:Title', namespaces).text\n",
" identifier = layer.find('ows:Identifier', namespaces).text\n",
"\n",
" bounding_box = layer.find('ows:BoundingBox/ows:LowerCorner', namespaces)\n",
" lower_corner = bounding_box.text if bounding_box is not None else ''\n",
"\n",
" bounding_box = layer.find('ows:BoundingBox/ows:UpperCorner', namespaces)\n",
" upper_corner = bounding_box.text if bounding_box is not None else ''\n",
"\n",
" fmt = layer.find('Format', namespaces).text if layer.find('Format', namespaces) else ''\n",
"\n",
" tile_matrix_links = [tms.text for tms in layer.findall('TileMatrixSetLink/TileMatrixSet', namespaces)]\n",
" tile_matrix_set_links = ', '.join(tile_matrix_links)\n",
"\n",
" resource_url = layer.find('ResourceURL', namespaces)\n",
" resource_url_template = resource_url.get('template') if resource_url is not None else ''\n",
"\n",
" writer.writerow([\n",
" title,\n",
" identifier,\n",
" lower_corner,\n",
" upper_corner,\n",
" fmt,\n",
" tile_matrix_set_links,\n",
" resource_url_template\n",
" ])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Parse dates"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Title | \n",
" Identifier | \n",
" LowerCorner | \n",
" UpperCorner | \n",
" Format | \n",
" TileMatrixSetLinks | \n",
" ResourceURL_Template | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" World Imagery (Wayback 2024-10-10) | \n",
" WB_2024_R11 | \n",
" -2.003750722959434E7 -2.003750722959434E7 | \n",
" 2.003750722959434E7 2.003750722959434E7 | \n",
" NaN | \n",
" default028mm, GoogleMapsCompatible | \n",
" https://wayback.maptiles.arcgis.com/arcgis/res... | \n",
"
\n",
" \n",
" 1 | \n",
" World Imagery (Wayback 2024-09-19) | \n",
" WB_2024_R10 | \n",
" -2.003750722959434E7 -2.003750722959434E7 | \n",
" 2.003750722959434E7 2.003750722959434E7 | \n",
" NaN | \n",
" default028mm, GoogleMapsCompatible | \n",
" https://wayback.maptiles.arcgis.com/arcgis/res... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Title Identifier \\\n",
"0 World Imagery (Wayback 2024-10-10) WB_2024_R11 \n",
"1 World Imagery (Wayback 2024-09-19) WB_2024_R10 \n",
"\n",
" LowerCorner \\\n",
"0 -2.003750722959434E7 -2.003750722959434E7 \n",
"1 -2.003750722959434E7 -2.003750722959434E7 \n",
"\n",
" UpperCorner Format \\\n",
"0 2.003750722959434E7 2.003750722959434E7 NaN \n",
"1 2.003750722959434E7 2.003750722959434E7 NaN \n",
"\n",
" TileMatrixSetLinks \\\n",
"0 default028mm, GoogleMapsCompatible \n",
"1 default028mm, GoogleMapsCompatible \n",
"\n",
" ResourceURL_Template \n",
"0 https://wayback.maptiles.arcgis.com/arcgis/res... \n",
"1 https://wayback.maptiles.arcgis.com/arcgis/res... "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv('wayback.csv')\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Title | \n",
" Identifier | \n",
" LowerCorner | \n",
" UpperCorner | \n",
" Format | \n",
" TileMatrixSetLinks | \n",
" ResourceURL_Template | \n",
" date | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" World Imagery (Wayback 2024-10-10) | \n",
" WB_2024_R11 | \n",
" -2.003750722959434E7 -2.003750722959434E7 | \n",
" 2.003750722959434E7 2.003750722959434E7 | \n",
" NaN | \n",
" default028mm, GoogleMapsCompatible | \n",
" https://wayback.maptiles.arcgis.com/arcgis/res... | \n",
" 2024-10-10 | \n",
"
\n",
" \n",
" 1 | \n",
" World Imagery (Wayback 2024-09-19) | \n",
" WB_2024_R10 | \n",
" -2.003750722959434E7 -2.003750722959434E7 | \n",
" 2.003750722959434E7 2.003750722959434E7 | \n",
" NaN | \n",
" default028mm, GoogleMapsCompatible | \n",
" https://wayback.maptiles.arcgis.com/arcgis/res... | \n",
" 2024-09-19 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Title Identifier \\\n",
"0 World Imagery (Wayback 2024-10-10) WB_2024_R11 \n",
"1 World Imagery (Wayback 2024-09-19) WB_2024_R10 \n",
"\n",
" LowerCorner \\\n",
"0 -2.003750722959434E7 -2.003750722959434E7 \n",
"1 -2.003750722959434E7 -2.003750722959434E7 \n",
"\n",
" UpperCorner Format \\\n",
"0 2.003750722959434E7 2.003750722959434E7 NaN \n",
"1 2.003750722959434E7 2.003750722959434E7 NaN \n",
"\n",
" TileMatrixSetLinks \\\n",
"0 default028mm, GoogleMapsCompatible \n",
"1 default028mm, GoogleMapsCompatible \n",
"\n",
" ResourceURL_Template date \n",
"0 https://wayback.maptiles.arcgis.com/arcgis/res... 2024-10-10 \n",
"1 https://wayback.maptiles.arcgis.com/arcgis/res... 2024-09-19 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['date'] = df['Title'].str.extract(r'(\\d{4}-\\d{2}-\\d{2})')\n",
"df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df.to_parquet('wayback.parquet', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "zeel_py310",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}