mattritchey commited on
Commit
89faf7b
·
verified ·
1 Parent(s): 0f7eabd

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. main.py +126 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9
3
+
4
+ WORKDIR /code
5
+
6
+ COPY ./requirements.txt /code/requirements.txt
7
+
8
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
+
10
+ RUN useradd -m -u 1000 user
11
+
12
+ USER user
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ WORKDIR $HOME/app
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import uvicorn
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ import pickle
7
+ import rasterio
8
+ import h5py
9
+ from skimage.morphology import disk
10
+ from geopy.extra.rate_limiter import RateLimiter
11
+ from geopy.geocoders import Nominatim
12
+
13
+ app = FastAPI()
14
+
15
+
16
+ #Endpoints
17
+ #Root endpoints
18
+ @app.get("/")
19
+ def root():
20
+ return {"API": "Hail API 2020"}
21
+
22
+ def geocode_address(address):
23
+
24
+ try:
25
+ address2 = address.replace(' ', '+').replace(',', '%2C')
26
+ df = pd.read_json(
27
+ f'https://geocoding.geo.census.gov/geocoder/locations/onelineaddress?address={address2}&benchmark=2020&format=json')
28
+ results = df.iloc[:1, 0][0][0]['coordinates']
29
+ lat, lon = results['y'], results['x']
30
+ except:
31
+ geolocator = Nominatim(user_agent='GTA Lookup')
32
+ geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)
33
+ location = geolocator.geocode(address)
34
+ lat, lon = location.latitude, location.longitude
35
+
36
+ return lat, lon
37
+
38
+ def get_hail_data(address, start_date, end_date, radius_miles, get_max):
39
+
40
+ start_date = pd.Timestamp(str(start_date)).strftime('%Y%m%d')
41
+ end_date = pd.Timestamp(str(end_date)).strftime('%Y%m%d')
42
+ date_years = pd.date_range(start=start_date, end=end_date, freq='M')
43
+ date_range_days = pd.date_range(start_date, end_date)
44
+ years = list(set([d.year for d in date_years]))
45
+
46
+ if len(years) == 0:
47
+ years = [pd.Timestamp(start_date).year]
48
+
49
+ # Geocode Address
50
+ lat, lon= geocode_address(address)
51
+
52
+ # Convert Lat Lon to row & col on Array
53
+ transform = pickle.load(open('Data/transform_mrms.pkl', 'rb'))
54
+
55
+ row, col = rasterio.transform.rowcol(transform, lon, lat)
56
+
57
+ files = [
58
+ 'Data/2023_hail.h5',
59
+ 'Data/2022_hail.h5',
60
+ 'Data/2021_hail.h5',
61
+ 'Data/2020_hail.h5'
62
+ ]
63
+
64
+ files_choosen = [i for i in files if any(i for j in years if str(j) in i)]
65
+
66
+ # Query and Collect H5 Data
67
+ all_data = []
68
+ all_dates = []
69
+ for file in files_choosen:
70
+ with h5py.File(file, 'r') as f:
71
+ # Get Dates from H5
72
+ dates = f['dates'][:]
73
+ date_idx = np.where((dates >= int(start_date))
74
+ & (dates <= int(end_date)))[0]
75
+
76
+ # Select Data by Date and Radius
77
+ dates = dates[date_idx]
78
+ data = f['hail'][date_idx, row-radius_miles:row +
79
+ radius_miles+1, col-radius_miles:col+radius_miles+1]
80
+
81
+ all_data.append(data)
82
+ all_dates.append(dates)
83
+
84
+ data_all = np.vstack(all_data)
85
+ dates_all = np.concatenate(all_dates)
86
+
87
+ # Convert to Inches
88
+ data_mat = np.where(data_all < 0, 0, data_all)*0.0393701
89
+
90
+ # Get Radius of Data
91
+ disk_mask = np.where(disk(radius_miles) == 1, True, False)
92
+ data_mat = np.where(disk_mask, data_mat, -1).round(3)
93
+
94
+ # Process to DataFrame
95
+ # Find Max of Data
96
+ if get_max == True:
97
+ data_max = np.max(data_mat, axis=(1, 2))
98
+ df_data = pd.DataFrame({'Date': dates_all,
99
+ 'Hail_max': data_max})
100
+ # Get all Data
101
+ else:
102
+ data_all = list(data_mat)
103
+ df_data = pd.DataFrame({'Date': dates_all,
104
+ 'Hail_all': data_all})
105
+
106
+ df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y%m%d')
107
+ df_data = df_data.set_index('Date')
108
+
109
+ df_data = df_data.reindex(date_range_days, fill_value=0).reset_index().rename(
110
+ columns={'index': 'Date'})
111
+ df_data['Date'] = df_data['Date'].dt.strftime('%Y-%m-%d')
112
+
113
+ return df_data
114
+
115
+
116
+ @app.get('/Hail_Docker_Data')
117
+ async def predict(address: str, start_date: str, end_date: str, radius_miles: int, get_max: bool):
118
+
119
+ try:
120
+ results = get_hail_data(address, start_date,
121
+ end_date, radius_miles, get_max)
122
+ except:
123
+ results = pd.DataFrame({'Date': ['error'], 'Hail_max': ['error']})
124
+
125
+ return results.to_json()
126
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi[all]
2
+ uvicorn[standard]
3
+ beautifulsoup4
4
+ numpy
5
+ pandas
6
+ rasterio
7
+ h5py
8
+ scikit-image
9
+ geopy