File size: 4,474 Bytes
89faf7b
 
 
 
 
 
 
 
 
 
 
18a0912
560c525
89faf7b
 
 
 
 
 
 
bccf49f
3e625c8
 
 
2a2a3af
 
 
 
89faf7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809b57b
 
89faf7b
3d1bfb4
 
 
 
 
 
 
 
89faf7b
b52bc3a
6c8879c
 
 
 
1359579
809b57b
89faf7b
b0d4da1
3e625c8
b0d4da1
 
3e625c8
b0d4da1
3d1bfb4
 
 
 
 
 
 
b0d4da1
3d1bfb4
b0d4da1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c8879c
89faf7b
b6f337f
89faf7b
 
 
809b57b
89faf7b
 
809b57b
89faf7b
69b948e
 
89faf7b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from fastapi import FastAPI
import uvicorn

import pandas as pd
import numpy as np
import pickle
import rasterio
import h5py
from skimage.morphology import disk
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim
import pyproj

app = FastAPI()


#Endpoints
#Root endpoints
@app.get("/")
def root():
    return {"API": "ACPC HRRR"}


def lat_lon_to_row_col(lat, lon):
    crs_dic = pickle.load(open('hrrr_crs.pkl', 'rb'))
    lon_hrrr, lat_hrrr = crs_dic['proj_4326'].transform(lon, lat)
    row, col = rasterio.transform.rowcol(crs_dic['affine'], lon_hrrr, lat_hrrr)
    return int(row), int(col)
    
def geocode_address(address):

    try:
        address2 = address.replace(' ', '+').replace(',', '%2C')
        df = pd.read_json(
            f'https://geocoding.geo.census.gov/geocoder/locations/onelineaddress?address={address2}&benchmark=2020&format=json')
        results = df.iloc[:1, 0][0][0]['coordinates']
        lat, lon = results['y'], results['x']
    except:
        geolocator = Nominatim(user_agent='GTA Lookup')
        geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)
        location = geolocator.geocode(address)
        lat, lon = location.latitude, location.longitude
        
    return lat, lon


def get_data(address, start_date, end_date, radius_miles, get_max):

    start_date = pd.Timestamp(str(start_date)).strftime('%Y%m%d%H')
    end_date = pd.Timestamp(str(end_date)).strftime('%Y%m%d%H')
    date_years = pd.date_range(start=start_date[:-2], end=end_date[:-2], freq='M')
    date_range_days = pd.date_range(start_date[:-2], end_date[:-2], freq='H')
    years = list(set([d.year for d in date_years]))

    if len(years) == 0:
        years = [pd.Timestamp(start_date).year]
    
    # Geocode Address
    try:
        lat, lon= geocode_address(address)
    except:
        lat, lon= None,None

    
    # Convert Lat Lon to row & col on Array
    try:
        row, col = lat_lon_to_row_col(lat, lon)
    except:
        row=col=None
   

    files = [
        # 'Data/APCP_2024_hrrr_v2.h5',
        'Data/APCP_2020_hrrr_v3.h5',
        'Data/APCP_2021_hrrr_3.h5',
        'Data/APCP_2022_hrrr_v2.h5',
        # 'Data/APCP_2023_hrrr_v2c.h5'
    ]

    files_choosen = [i for i in files if any(i for j in years if str(j) in i)]


    # # Query and Collect H5 Data
    # all_data = []
    # all_dates = []
    # for file in files_choosen:
    #     with h5py.File(file, 'r') as f:
    #         # Get Dates from H5
    #         dates = f['date_time_hr'][:]
    #         date_idx = np.where((dates >= int(start_date))
    #                             & (dates <= int(end_date)))[0]

    #         # Select Data by Date and Radius
    #         dates = dates[date_idx]
    #         data = f['APCP'][date_idx, row-radius_miles:row +
    #                          radius_miles+1, col-radius_miles:col+radius_miles+1]

    #         all_data.append(data)
    #         all_dates.append(dates)

    # data_all = np.vstack(all_data)
    # dates_all = np.concatenate(all_dates)

    # # Convert to Inches
    # data_mat = np.where(data_all < 0, 0, data_all)*0.0393701

    # # Get Radius of Data
    # disk_mask = np.where(disk(radius_miles) == 1, True, False)
    # data_mat = np.where(disk_mask, data_mat, -1).round(3)

    # # Process to DataFrame
    # # Find Max of Data
    # if get_max == True:
    #     data_max = np.max(data_mat, axis=(1, 2))
    #     df_data = pd.DataFrame({'Date': dates_all,
    #                            'APCP_max': data_max})
    # # Get all Data
    # else:
    #     data_all = list(data_mat)
    #     df_data = pd.DataFrame({'Date': dates_all,
    #                            'APCP_all': data_all})

    # df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y%m%d%H')
    # df_data = df_data.set_index('Date')

    # df_data = df_data.reindex(date_range_days, fill_value=0).reset_index().rename(
    #     columns={'index': 'Date'})
    # df_data['Date'] = df_data['Date'].dt.strftime('%Y-%m-%d:%H')

    # return df_data
    return lat, lon, row, col

@app.get('/APCP_Docker_Data')
async def predict(address: str, start_date: str, end_date: str, radius_miles: int, get_max: bool):

    try:
        results = get_data(address, start_date,
                                end_date, radius_miles, get_max)
    except:
        results = pd.DataFrame({'Date': ['error'], 'APCP_max': ['error']})

    # return results.to_json()
    return results