Spaces:
Running
Running
Commit
·
be11b4e
1
Parent(s):
86bd91c
first draft of comparison across models and prompts
Browse files
app.py
CHANGED
@@ -13,24 +13,33 @@ st.title("LLM assessments: Microsoft's Phi-2 and Google's Gemma-7b")
|
|
13 |
import urllib.request
|
14 |
import os
|
15 |
|
16 |
-
|
17 |
-
'Select the model:',
|
18 |
-
[
|
19 |
"Microsoft's Phi-2",
|
20 |
"Google's Gemma-7b"
|
21 |
-
]
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
else:
|
29 |
-
prefix_post_processing = os.environ["POST_PROCESSING_JSON_GEMMA"]
|
30 |
-
st.write("""
|
31 |
-
Google Gemma-7b (https://huggingface.co/google/gemma-7b) is a Large Language Models (LLM) with 8.54 billion parameters. As per the https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf, Gemma-7b performs well in language understanding, reasoning, and safety tasks. This model is one of state of the art open models built based on similar technologies that were used to create Google's Gemini models.""")
|
32 |
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
# prefix_post_processing = os.environ["POST_PROCESSING_JSON"]
|
@@ -76,30 +85,33 @@ for t_opt in options:
|
|
76 |
result_processor_obj_dict = {}
|
77 |
result_file_dict = {}
|
78 |
data_dicts_dict = {}
|
79 |
-
|
80 |
-
|
|
|
|
|
81 |
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
86 |
|
87 |
-
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
|
|
|
97 |
|
98 |
print(result_processor_obj_dict)
|
99 |
|
100 |
|
101 |
|
102 |
-
|
103 |
|
104 |
# t_result_file = st.selectbox(
|
105 |
# 'Select the prompt:',
|
@@ -349,7 +361,7 @@ temp_options_2 = st.multiselect(
|
|
349 |
|
350 |
st.markdown("---")
|
351 |
for t_opt in temp_options_2:
|
352 |
-
results_pert_rob_dict = result_processor_obj_dict[
|
353 |
merged_dfs_list = results_pert_rob_dict['merged_dfs_list']
|
354 |
t_pert_df_global_temps_list = results_pert_rob_dict['t_pert_df_global_temps_list']
|
355 |
family_names_list = results_pert_rob_dict['family_names_list']
|
|
|
13 |
import urllib.request
|
14 |
import os
|
15 |
|
16 |
+
llm_options = [
|
|
|
|
|
17 |
"Microsoft's Phi-2",
|
18 |
"Google's Gemma-7b"
|
19 |
+
]
|
20 |
|
21 |
+
model_select_options = st.multiselect(
|
22 |
+
'Select one or more models:',
|
23 |
+
llm_options,
|
24 |
+
[llm_options[0]])
|
25 |
|
|
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
+
prefix_post_processing_dict = {}
|
29 |
+
st.markdown("---")
|
30 |
+
for t_opt in model_select_options:
|
31 |
+
st.write(t_opt)
|
32 |
+
if t_opt == "Microsoft's Phi-2":
|
33 |
+
prefix_post_processing_dict[t_opt] = os.environ["POST_PROCESSING_JSON"]
|
34 |
+
st.write("""
|
35 |
+
Microsoft's Phi-2 (https://huggingface.co/microsoft/phi-2) is a Transformer model with 2.7 billion parameters. Performance on benchmarks for common sense, language understanding, and logical reasoning is nearly state-of-the-art among models with less than 13 billion parameters. Unlike typical Large Language Models (LLM), Phi-2 has not been fine-tuned through reinforcement learning from human feedback.""")
|
36 |
+
|
37 |
+
else:
|
38 |
+
prefix_post_processing_dict[t_opt] = os.environ["POST_PROCESSING_JSON_GEMMA"]
|
39 |
+
st.write("""
|
40 |
+
Google Gemma-7b (https://huggingface.co/google/gemma-7b) is a Large Language Models (LLM) with 8.54 billion parameters. As per the https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf, Gemma-7b performs well in language understanding, reasoning, and safety tasks. This model is one of state of the art open models built based on similar technologies that were used to create Google's Gemini models.""")
|
41 |
+
st.markdown("---")
|
42 |
+
|
43 |
|
44 |
|
45 |
# prefix_post_processing = os.environ["POST_PROCESSING_JSON"]
|
|
|
85 |
result_processor_obj_dict = {}
|
86 |
result_file_dict = {}
|
87 |
data_dicts_dict = {}
|
88 |
+
main_options = []
|
89 |
+
for model_option in model_select_options:
|
90 |
+
for t_result_file in options:
|
91 |
+
result_file = prefix_post_processing_dict[model_option] + prompt_options_dict[t_result_file]
|
92 |
|
93 |
+
prompt_option = int(prompt_options_dict[t_result_file].split('_')[-1].split('.')[0])
|
94 |
|
95 |
+
with urllib.request.urlopen(result_file) as url:
|
96 |
+
data_dict = json.load(url)
|
97 |
|
98 |
+
|
99 |
|
100 |
+
result_processor_obj_dict[model_option + ' with ' + t_result_file] = ResultsProcessor(
|
101 |
+
prompt_option=prompt_option,
|
102 |
+
result_file=result_file,
|
103 |
+
data_dict= data_dict
|
104 |
+
)
|
105 |
+
data_dicts_dict[model_option + ' with ' + t_result_file] = data_dict
|
106 |
+
result_file_dict[model_option + ' with ' + t_result_file] = result_file
|
107 |
|
108 |
+
main_options += [model_option + ' with ' + t_result_file]
|
109 |
|
110 |
print(result_processor_obj_dict)
|
111 |
|
112 |
|
113 |
|
114 |
+
options = main_options
|
115 |
|
116 |
# t_result_file = st.selectbox(
|
117 |
# 'Select the prompt:',
|
|
|
361 |
|
362 |
st.markdown("---")
|
363 |
for t_opt in temp_options_2:
|
364 |
+
results_pert_rob_dict = result_processor_obj_dict[t_opt].get_performance_robustness(option)
|
365 |
merged_dfs_list = results_pert_rob_dict['merged_dfs_list']
|
366 |
t_pert_df_global_temps_list = results_pert_rob_dict['t_pert_df_global_temps_list']
|
367 |
family_names_list = results_pert_rob_dict['family_names_list']
|