willengler-uc commited on
Commit
bfbadb9
1 Parent(s): d90d38c

Upload 10 files

Browse files
ASR_model/RandomForestRegressor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0a9462517c64501ea1ad7370d25aab33e0f53e4e599f57416b99a22f91e22f
3
+ size 5875427
ASR_model/StandardScaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f522e9ec6b3d3b3574cd12986e902c4ab24f3c16594079f892872abf5cc888d9
3
+ size 4085
ASR_model/X_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
Barrier_model/RandomForestRegressor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595508e8f0f5df559fdee81cf46101a113e39dba95e420d5bd4e7e413f8ec9e1
3
+ size 5843347
Barrier_model/StandardScaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f28a8dbe994a0c61007a152b4749d4d33ea310bfc955e945aff5ebe0adeb9d
3
+ size 4133
Barrier_model/X_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
Stability_model/RandomForestRegressor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369399b4692dce213170d16e1d244b877a5d466ea4677a3eb2d90189e60a076f
3
+ size 55151315
Stability_model/StandardScaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df4eed3d243c922177f640ca7c8c8150165e311dbd854716997beba8d2b8a801
3
+ size 2821
Stability_model/X_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
model_predict_df.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ from mastml.feature_generators import ElementalFeatureGenerator, OneHotGroupGenerator
6
+ from pymatgen.analysis.cost import CostAnalyzer, CostDBElements
7
+
8
+ def get_cost(comp_list):
9
+ ca = CostAnalyzer(costdb=CostDBElements())
10
+ costs = list()
11
+ for comp in comp_list:
12
+ cost = ca.get_cost_per_kg(comp=comp)
13
+ costs.append(cost)
14
+ return costs
15
+
16
+ def get_stability(df_test):
17
+ d = 'ASR_model/Stability_model'
18
+ scaler = joblib.load(os.path.join(d, 'StandardScaler.pkl'))
19
+ model = joblib.load(os.path.join(d, 'RandomForestRegressor.pkl'))
20
+ df_features = pd.read_csv(os.path.join(d, 'X_train.csv'))
21
+
22
+ features = df_features.columns.tolist()
23
+ df_test2 = df_test[features]
24
+ X_stab = scaler.transform(df_test2)
25
+ stabilities = model.predict(X_stab)
26
+
27
+ return stabilities
28
+
29
+ def get_barrier(df_test):
30
+ d = 'ASR_model/Barrier_model'
31
+ scaler = joblib.load(os.path.join(d, 'StandardScaler.pkl'))
32
+ model = joblib.load(os.path.join(d, 'RandomForestRegressor.pkl'))
33
+ df_features = pd.read_csv(os.path.join(d, 'X_train.csv'))
34
+
35
+ features = df_features.columns.tolist()
36
+ X_barrier = df_test[features]
37
+ X_barrier = scaler.transform(X_barrier)
38
+ barriers = model.predict(X_barrier)
39
+
40
+ return barriers
41
+
42
+ def get_asr(df_test):
43
+ d = 'ASR_model/ASR_model'
44
+ scaler = joblib.load(os.path.join(d, 'StandardScaler.pkl'))
45
+ model = joblib.load(os.path.join(d, 'RandomForestRegressor.pkl'))
46
+ df_features = pd.read_csv(os.path.join(d, 'X_train.csv'))
47
+
48
+ features = df_features.columns.tolist()
49
+ df_test = df_test[features]
50
+
51
+ X_ASR = scaler.transform(df_test)
52
+
53
+ asrs = model.predict(X_ASR)
54
+
55
+ # Get ebars and recalibrate them
56
+ a = 0.42824232546669644
57
+ b = 0.36341790743237223
58
+ errs_list = list()
59
+ for i, x in X_ASR.iterrows():
60
+ preds_list = list()
61
+ for pred in model.model.estimators_:
62
+ preds_list.append(pred.predict(np.array(x).reshape(1, -1))[0])
63
+ errs_list.append(np.std(preds_list))
64
+ asr_ebars = a * np.array(errs_list) + b
65
+
66
+ return asrs, asr_ebars
67
+
68
+ def process_data(comp_list, elec_list):
69
+ X = pd.DataFrame(np.empty((len(comp_list),)))
70
+ y = pd.DataFrame(np.empty((len(comp_list),)))
71
+
72
+ df_test = pd.DataFrame({'Material composition': comp_list})
73
+
74
+ # Try this both ways depending on mastml version used.
75
+ try:
76
+ X, y = ElementalFeatureGenerator(composition_df=df_test['Material composition'],
77
+ feature_types=['composition_avg', 'arithmetic_avg', 'max', 'min','difference'],
78
+ remove_constant_columns=False).evaluate(X=X, y=y, savepath=os.getcwd(), make_new_dir=False)
79
+ except:
80
+ X, y = ElementalFeatureGenerator(featurize_df=df_test['Material composition'],
81
+ feature_types=['composition_avg', 'arithmetic_avg', 'max', 'min',
82
+ 'difference'], remove_constant_columns=False).evaluate(X=X, y=y, savepath=os.getcwd(), make_new_dir=False)
83
+
84
+ df_test = pd.concat([df_test, X], axis=1)
85
+
86
+ elec_cls_0 = list()
87
+ elec_cls_1 = list()
88
+ elec_cls_2 = list()
89
+ elec_cls_3 = list()
90
+ for elec in elec_list:
91
+ if elec == 'ceria':
92
+ elec_cls_0.append(1)
93
+ elec_cls_1.append(0)
94
+ elec_cls_2.append(0)
95
+ elec_cls_3.append(0)
96
+ elif elec == 'mixed':
97
+ elec_cls_0.append(0)
98
+ elec_cls_1.append(1)
99
+ elec_cls_2.append(0)
100
+ elec_cls_3.append(0)
101
+ elif elec == 'perovskite':
102
+ elec_cls_0.append(0)
103
+ elec_cls_1.append(0)
104
+ elec_cls_2.append(1)
105
+ elec_cls_3.append(0)
106
+ elif elec == 'zirconia':
107
+ elec_cls_0.append(0)
108
+ elec_cls_1.append(0)
109
+ elec_cls_2.append(0)
110
+ elec_cls_3.append(1)
111
+ else:
112
+ raise ValueError('Invalid electrolyte choice detected. Valid choices are "ceria", "mixed", "perovskite", "zirconia"')
113
+
114
+ df_test['Electrolyte class_0'] = elec_cls_0 # ceria
115
+ df_test['Electrolyte class_1'] = elec_cls_1 # mixed
116
+ df_test['Electrolyte class_2'] = elec_cls_2 # perovskite
117
+ df_test['Electrolyte class_3'] = elec_cls_3 # zirconia
118
+
119
+ return df_test
120
+
121
+ def make_predictions(comp_list, elec_list):
122
+
123
+ # Check comp and elec list lengths match
124
+ assert len(comp_list) == len(elec_list)
125
+
126
+ # Process data
127
+ df_test = process_data(comp_list, elec_list)
128
+
129
+ # Calculate the cost of the materials
130
+ costs = get_cost(comp_list)
131
+
132
+ # Get the ML-predicted stability of the materials
133
+ stabilities = get_stability(df_test)
134
+
135
+ # Get the ML-predicted ASR barrier of the materials
136
+ barriers = get_barrier(df_test)
137
+
138
+ df_test['ML pred ASR barrier (eV)'] = barriers
139
+ asrs, asr_ebars = get_asr(df_test)
140
+
141
+ pred_dict = {'Compositions': comp_list,
142
+ 'Electrolytes': elec_list,
143
+ 'Cost ($/kg)': costs,
144
+ 'Stability @ 500C (meV/atom)': stabilities,
145
+ 'ASR barrier (eV)': barriers,
146
+ 'log ASR at 500C (Ohm-cm2)': asrs,
147
+ 'log ASR error (Ohm-cm2)': asr_ebars}
148
+
149
+ return pd.DataFrame(pred_dict)
150
+