apcp_hrrr_api / main.py
mattritchey's picture
Update main.py
3d1bfb4 verified
raw
history blame
4.47 kB
from fastapi import FastAPI
import uvicorn
import pandas as pd
import numpy as np
import pickle
import rasterio
import h5py
from skimage.morphology import disk
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim
import pyproj
app = FastAPI()
#Endpoints
#Root endpoints
@app.get("/")
def root():
return {"API": "ACPC HRRR"}
def lat_lon_to_row_col(lat, lon):
crs_dic = pickle.load(open('hrrr_crs.pkl', 'rb'))
lon_hrrr, lat_hrrr = crs_dic['proj_4326'].transform(lon, lat)
row, col = rasterio.transform.rowcol(crs_dic['affine'], lon_hrrr, lat_hrrr)
return int(row), int(col)
def geocode_address(address):
try:
address2 = address.replace(' ', '+').replace(',', '%2C')
df = pd.read_json(
f'https://geocoding.geo.census.gov/geocoder/locations/onelineaddress?address={address2}&benchmark=2020&format=json')
results = df.iloc[:1, 0][0][0]['coordinates']
lat, lon = results['y'], results['x']
except:
geolocator = Nominatim(user_agent='GTA Lookup')
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)
location = geolocator.geocode(address)
lat, lon = location.latitude, location.longitude
return lat, lon
def get_data(address, start_date, end_date, radius_miles, get_max):
start_date = pd.Timestamp(str(start_date)).strftime('%Y%m%d%H')
end_date = pd.Timestamp(str(end_date)).strftime('%Y%m%d%H')
date_years = pd.date_range(start=start_date[:-2], end=end_date[:-2], freq='M')
date_range_days = pd.date_range(start_date[:-2], end_date[:-2], freq='H')
years = list(set([d.year for d in date_years]))
if len(years) == 0:
years = [pd.Timestamp(start_date).year]
# Geocode Address
try:
lat, lon= geocode_address(address)
except:
lat, lon= None,None
# Convert Lat Lon to row & col on Array
try:
row, col = lat_lon_to_row_col(lat, lon)
except:
row=col=None
files = [
# 'Data/APCP_2024_hrrr_v2.h5',
'Data/APCP_2020_hrrr_v3.h5',
'Data/APCP_2021_hrrr_3.h5',
'Data/APCP_2022_hrrr_v2.h5',
# 'Data/APCP_2023_hrrr_v2c.h5'
]
files_choosen = [i for i in files if any(i for j in years if str(j) in i)]
# # Query and Collect H5 Data
# all_data = []
# all_dates = []
# for file in files_choosen:
# with h5py.File(file, 'r') as f:
# # Get Dates from H5
# dates = f['date_time_hr'][:]
# date_idx = np.where((dates >= int(start_date))
# & (dates <= int(end_date)))[0]
# # Select Data by Date and Radius
# dates = dates[date_idx]
# data = f['APCP'][date_idx, row-radius_miles:row +
# radius_miles+1, col-radius_miles:col+radius_miles+1]
# all_data.append(data)
# all_dates.append(dates)
# data_all = np.vstack(all_data)
# dates_all = np.concatenate(all_dates)
# # Convert to Inches
# data_mat = np.where(data_all < 0, 0, data_all)*0.0393701
# # Get Radius of Data
# disk_mask = np.where(disk(radius_miles) == 1, True, False)
# data_mat = np.where(disk_mask, data_mat, -1).round(3)
# # Process to DataFrame
# # Find Max of Data
# if get_max == True:
# data_max = np.max(data_mat, axis=(1, 2))
# df_data = pd.DataFrame({'Date': dates_all,
# 'APCP_max': data_max})
# # Get all Data
# else:
# data_all = list(data_mat)
# df_data = pd.DataFrame({'Date': dates_all,
# 'APCP_all': data_all})
# df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y%m%d%H')
# df_data = df_data.set_index('Date')
# df_data = df_data.reindex(date_range_days, fill_value=0).reset_index().rename(
# columns={'index': 'Date'})
# df_data['Date'] = df_data['Date'].dt.strftime('%Y-%m-%d:%H')
# return df_data
return lat, lon, row, col
@app.get('/APCP_Docker_Data')
async def predict(address: str, start_date: str, end_date: str, radius_miles: int, get_max: bool):
try:
results = get_data(address, start_date,
end_date, radius_miles, get_max)
except:
results = pd.DataFrame({'Date': ['error'], 'APCP_max': ['error']})
# return results.to_json()
return results