zhaorui-nb commited on
Commit
2f22782
1 Parent(s): db33c1e
Files changed (7) hide show
  1. .gitattributes +35 -35
  2. .gitignore +12 -12
  3. README.md +49 -49
  4. app.py +224 -224
  5. batch_eval_script.py +94 -94
  6. utils/Evaluation_answer_txt.py +179 -179
  7. utils/upload_hub.py +56 -56
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,12 +1,12 @@
1
-
2
- secrets.toml
3
- __pycache__
4
-
5
- # *.txt
6
- *.tsv
7
- *.csv
8
- *.json
9
- *.txt
10
-
11
-
12
-
 
1
+
2
+ secrets.toml
3
+ __pycache__
4
+
5
+ # *.txt
6
+ *.tsv
7
+ *.csv
8
+ *.json
9
+ *.txt
10
+
11
+
12
+
README.md CHANGED
@@ -1,49 +1,49 @@
1
- ---
2
- title: De Identification Leaderboard
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.35.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
-
15
-
16
-
17
-
18
- # de-identification-leaderboard
19
-
20
- ## leaderboard data
21
- score wil save to huggingface dataset
22
- [zhaorui-nb/leaderboard-score](https://huggingface.co/datasets/zhaorui-nb/leaderboard-score)
23
-
24
-
25
- ## submit
26
- ### filename format
27
- replace '/' to '@'
28
- ```
29
- [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt"
30
- ```
31
-
32
- ### line in answer txt (tsv)
33
- ```
34
- {file_name}\t{label_type}\t{label_start}\t{label_end}\t{label_text}\n
35
- ```
36
-
37
- ## Support dataset
38
- ```
39
- Setting1
40
- Setting2
41
- Setting3
42
- ```
43
-
44
-
45
- # cli batch eval tool
46
- ```
47
- python .\batch_eval_script.py ..\deid_resaut
48
- ```
49
-
 
1
+ ---
2
+ title: De Identification Leaderboard
3
+ emoji: 🏃
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.35.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+
16
+
17
+
18
+ # de-identification-leaderboard
19
+
20
+ ## leaderboard data
21
+ score wil save to huggingface dataset
22
+ [zhaorui-nb/leaderboard-score](https://huggingface.co/datasets/zhaorui-nb/leaderboard-score)
23
+
24
+
25
+ ## submit
26
+ ### filename format
27
+ replace '/' to '@'
28
+ ```
29
+ [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt"
30
+ ```
31
+
32
+ ### line in answer txt (tsv)
33
+ ```
34
+ {file_name}\t{label_type}\t{label_start}\t{label_end}\t{label_text}\n
35
+ ```
36
+
37
+ ## Support dataset
38
+ ```
39
+ Setting1
40
+ Setting2
41
+ Setting3
42
+ ```
43
+
44
+
45
+ # cli batch eval tool
46
+ ```
47
+ python .\batch_eval_script.py ..\deid_resaut
48
+ ```
49
+
app.py CHANGED
@@ -1,224 +1,224 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import os
4
- from utils.Evaluation_answer_txt import Evaluation_answer_txt
5
- from utils.upload_hub import upload_scores_to_hub, file_name_decode
6
- import time
7
- import json
8
- import datasets
9
- from huggingface_hub import HfApi
10
- from huggingface_hub import hf_hub_download
11
- # st.set_page_config(layout="wide")
12
- st.set_page_config(layout="centered")
13
- st.markdown(
14
- f"""
15
- <style>
16
- .appview-container .main .block-container{{
17
- max-width: 80%;
18
- padding: 50px;
19
- }}
20
- </style>
21
- """,
22
- unsafe_allow_html=True
23
- )
24
-
25
- @st.cache_data
26
- def download_gold_answer(repo, filename, token, force_download=False):
27
- ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
28
- return ret
29
-
30
-
31
- HUB_TOKEN = st.secrets['hf']
32
- HUB_API = HfApi(token=HUB_TOKEN)
33
-
34
- LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score'
35
- # Setting1 Setting2 Setting3
36
-
37
- ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
38
- GET_GOLD_ANSWER_PATH = {
39
- 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
40
- 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
41
- 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
42
- }
43
-
44
-
45
- # cache the dataset in the session state
46
- def get_leaderboard_df():
47
- with st.spinner('Loading leaderboard data...'):
48
- if st.session_state.get('leaderboard_df') is None:
49
- dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO)
50
- df = pd.DataFrame(dataset['train'])
51
- st.session_state['leaderboard_df'] = df
52
- return df
53
- else:
54
- return st.session_state['leaderboard_df']
55
-
56
-
57
- st.title('De-identification Model Leaderboard')
58
-
59
- try:
60
- with st.container():
61
- # columns
62
- # ['model name', 'dataset', 'method', 'file name', 'submitter',
63
- # 'MICRO precision', 'MICRO recall', 'MICRO f1', 'MACRO precision',
64
- # 'MACRO recall', 'MACRO f1', 'detail result']
65
-
66
- df = get_leaderboard_df()
67
- # replace model name column @ to /
68
- df['model name'] = df['model name'].str.replace('@', '/')
69
-
70
- # remove the detail result column
71
- default_columns = [c for c in df.columns if c not in ['detail result']]
72
- selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns)
73
-
74
- leaderboard_df = st.dataframe(df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard')
75
-
76
- st.subheader("Detail Result")
77
- det_ind = st.session_state.leaderboard['selection']['rows']
78
- if len(det_ind) == 0:
79
- st.write(f'Please check the boxes to view the detailed results.')
80
- else:
81
- col_detial = st.columns(len(det_ind))
82
- for i, dind in enumerate(det_ind):
83
- with col_detial[i]:
84
- dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}"
85
- color = [st.success, st.info, st.warning, st.error]
86
- color[i % 4](dis)
87
-
88
- dic = json.loads(df.iloc[dind]['detail result'])
89
- dt_df = pd.DataFrame(dic).T
90
- st.dataframe(dt_df)
91
-
92
- except Exception as e:
93
- st.error(f"Error: {e}")
94
-
95
- st.markdown("---")
96
-
97
- # ############################################################################################################
98
- # ############################################### Evaluation_answer_txt
99
- # ############################################################################################################
100
-
101
- model_name_input = ''
102
- dataset_input = ''
103
- method_input = ''
104
- file_name = ''
105
- submitter_input = ''
106
-
107
- if 'score_json' not in st.session_state:
108
- st.session_state['score_json'] = None
109
-
110
- @st.cache_data()
111
- def get_file_info(uploaded_file):
112
- filename_info = file_name_decode(uploaded_file.name)
113
- return filename_info
114
-
115
- @st.cache_data()
116
- def eval_answer_txt(set_name, uploaded_file):
117
- print(f"eval_answer_txt: {time.time()}" , set_name)
118
-
119
- if set_name not in GET_GOLD_ANSWER_PATH:
120
- return None
121
- gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
122
- eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file)
123
- score_json = eval.eval()
124
- return score_json
125
-
126
- def clear_score_json():
127
- st.session_state['score_json'] = None
128
-
129
- st.title("Model Evaluation")
130
- st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt")
131
-
132
- col_upload = st.columns([3,1])
133
- with col_upload[0]:
134
- uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json)
135
- with col_upload[1]:
136
- if not uploaded_file:
137
- st.warning("please upload file")
138
- st.session_state['score_json'] = None
139
- else:
140
- st.success("file uploaded successfully")
141
-
142
- filename_info = get_file_info(uploaded_file)
143
- if filename_info:
144
- model_name_input = filename_info['model_name']
145
- dataset_input = filename_info['dataset']
146
- method_input = filename_info['method']
147
- file_name = filename_info['file_name']
148
-
149
- col_score = st.columns([7,5])
150
- if uploaded_file:
151
- with col_score[1], st.container(border=True):
152
- model_name_input = st.text_input("model name", model_name_input)
153
- dataset_input = st.text_input("dataset", dataset_input)
154
- method_input = st.text_input("method", method_input)
155
- file_name = st.text_input("file name", file_name)
156
- submitter_input = st.text_input("submitter", submitter_input)
157
- check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input
158
-
159
- col_sumit_and_recalculate = st.columns(2)
160
- with col_sumit_and_recalculate[0]:
161
- calculate_btn = st.button("calculate", type='secondary', use_container_width=True)
162
- with col_sumit_and_recalculate[1]:
163
- submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in)
164
-
165
- if calculate_btn or st.session_state['score_json'] is None:
166
- set_name = dataset_input
167
- st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file)
168
- if st.session_state['score_json']:
169
- st.success("evaluation success")
170
- else:
171
- st.error("evaluation failed, please check the file content or set the correct dataset name.")
172
-
173
- if st.session_state['score_json']:
174
- with col_score[0], st.container(border=True):
175
- df = pd.DataFrame(st.session_state['score_json']).T
176
- # split the column MICRO_AVERAGE and MACRO_AVERAGE into another dataframe
177
- tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0)
178
- avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]]
179
-
180
- col_sort_func = st.columns(2)
181
-
182
- with col_sort_func[0]:
183
- sorted_column = st.selectbox("选择排序列", df.columns)
184
-
185
- with col_sort_func[1]:
186
- ascending = st.radio("Sort Order", ["Ascending", "Descending"])
187
-
188
- tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending")
189
-
190
- st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True)
191
-
192
-
193
- if not check_all_fill_in:
194
- st.warning("Please fill in the complete information.")
195
-
196
- if submit_btn:
197
- if st.session_state['score_json']:
198
- score_json = st.session_state['score_json']
199
-
200
- leaderboard_dict = {
201
- "model name": model_name_input,
202
- "dataset": dataset_input,
203
- "method": method_input,
204
- "file name": file_name,
205
- "submitter": submitter_input,
206
-
207
- "MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
208
- "MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
209
- "MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
210
- "MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
211
- "MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
212
- "MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
213
- "detail result": json.dumps(score_json,indent=4) #score_json
214
- }
215
-
216
- repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
217
- upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO)
218
- if upload_res:
219
- st.success(f"submit success")
220
- st.success(f"your score at here: {upload_res}")
221
- else:
222
- st.error("submit failed")
223
-
224
-
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from utils.Evaluation_answer_txt import Evaluation_answer_txt
5
+ from utils.upload_hub import upload_scores_to_hub, file_name_decode
6
+ import time
7
+ import json
8
+ import datasets
9
+ from huggingface_hub import HfApi
10
+ from huggingface_hub import hf_hub_download
11
+ # st.set_page_config(layout="wide")
12
+ st.set_page_config(layout="centered")
13
+ st.markdown(
14
+ f"""
15
+ <style>
16
+ .appview-container .main .block-container{{
17
+ max-width: 80%;
18
+ padding: 50px;
19
+ }}
20
+ </style>
21
+ """,
22
+ unsafe_allow_html=True
23
+ )
24
+
25
+ @st.cache_data
26
+ def download_gold_answer(repo, filename, token, force_download=False):
27
+ ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
28
+ return ret
29
+
30
+
31
+ HUB_TOKEN = st.secrets['hf']
32
+ HUB_API = HfApi(token=HUB_TOKEN)
33
+
34
+ LEADERBOARD_DATASET_REPO = 'zhaorui-nb/leaderboard-score'
35
+ # Setting1 Setting2 Setting3
36
+
37
+ ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
38
+ GET_GOLD_ANSWER_PATH = {
39
+ 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
40
+ 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
41
+ 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
42
+ }
43
+
44
+
45
+ # cache the dataset in the session state
46
+ def get_leaderboard_df():
47
+ with st.spinner('Loading leaderboard data...'):
48
+ if st.session_state.get('leaderboard_df') is None:
49
+ dataset = datasets.load_dataset(LEADERBOARD_DATASET_REPO)
50
+ df = pd.DataFrame(dataset['train'])
51
+ st.session_state['leaderboard_df'] = df
52
+ return df
53
+ else:
54
+ return st.session_state['leaderboard_df']
55
+
56
+
57
+ st.title('De-identification Model Leaderboard')
58
+
59
+ try:
60
+ with st.container():
61
+ # columns
62
+ # ['model name', 'dataset', 'method', 'file name', 'submitter',
63
+ # 'MICRO precision', 'MICRO recall', 'MICRO f1', 'MACRO precision',
64
+ # 'MACRO recall', 'MACRO f1', 'detail result']
65
+
66
+ df = get_leaderboard_df()
67
+ # replace model name column @ to /
68
+ df['model name'] = df['model name'].str.replace('@', '/')
69
+
70
+ # remove the detail result column
71
+ default_columns = [c for c in df.columns if c not in ['detail result']]
72
+ selected_columns = st.multiselect('Select columns to display', df.columns, default=default_columns)
73
+
74
+ leaderboard_df = st.dataframe(df[selected_columns], selection_mode='multi-row', on_select='rerun', key='leaderboard')
75
+
76
+ st.subheader("Detail Result")
77
+ det_ind = st.session_state.leaderboard['selection']['rows']
78
+ if len(det_ind) == 0:
79
+ st.write(f'Please check the boxes to view the detailed results.')
80
+ else:
81
+ col_detial = st.columns(len(det_ind))
82
+ for i, dind in enumerate(det_ind):
83
+ with col_detial[i]:
84
+ dis = f"{df.iloc[dind]['model name']}___{df.iloc[dind]['dataset']}___{df.iloc[dind]['method']}"
85
+ color = [st.success, st.info, st.warning, st.error]
86
+ color[i % 4](dis)
87
+
88
+ dic = json.loads(df.iloc[dind]['detail result'])
89
+ dt_df = pd.DataFrame(dic).T
90
+ st.dataframe(dt_df)
91
+
92
+ except Exception as e:
93
+ st.error(f"Error: {e}")
94
+
95
+ st.markdown("---")
96
+
97
+ # ############################################################################################################
98
+ # ############################################### Evaluation_answer_txt
99
+ # ############################################################################################################
100
+
101
+ model_name_input = ''
102
+ dataset_input = ''
103
+ method_input = ''
104
+ file_name = ''
105
+ submitter_input = ''
106
+
107
+ if 'score_json' not in st.session_state:
108
+ st.session_state['score_json'] = None
109
+
110
+ @st.cache_data()
111
+ def get_file_info(uploaded_file):
112
+ filename_info = file_name_decode(uploaded_file.name)
113
+ return filename_info
114
+
115
+ @st.cache_data()
116
+ def eval_answer_txt(set_name, uploaded_file):
117
+ print(f"eval_answer_txt: {time.time()}" , set_name)
118
+
119
+ if set_name not in GET_GOLD_ANSWER_PATH:
120
+ return None
121
+ gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
122
+ eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file)
123
+ score_json = eval.eval()
124
+ return score_json
125
+
126
+ def clear_score_json():
127
+ st.session_state['score_json'] = None
128
+
129
+ st.title("Model Evaluation")
130
+ st.write("Support file naming: [{Organization@Model}][{Dataaset}][{Method}]{Filename}.txt")
131
+
132
+ col_upload = st.columns([3,1])
133
+ with col_upload[0]:
134
+ uploaded_file = st.file_uploader("Please upload the answer.txt file", type=["txt"], key="uploaded_file", on_change=clear_score_json)
135
+ with col_upload[1]:
136
+ if not uploaded_file:
137
+ st.warning("please upload file")
138
+ st.session_state['score_json'] = None
139
+ else:
140
+ st.success("file uploaded successfully")
141
+
142
+ filename_info = get_file_info(uploaded_file)
143
+ if filename_info:
144
+ model_name_input = filename_info['model_name']
145
+ dataset_input = filename_info['dataset']
146
+ method_input = filename_info['method']
147
+ file_name = filename_info['file_name']
148
+
149
+ col_score = st.columns([7,5])
150
+ if uploaded_file:
151
+ with col_score[1], st.container(border=True):
152
+ model_name_input = st.text_input("model name", model_name_input)
153
+ dataset_input = st.text_input("dataset", dataset_input)
154
+ method_input = st.text_input("method", method_input)
155
+ file_name = st.text_input("file name", file_name)
156
+ submitter_input = st.text_input("submitter", submitter_input)
157
+ check_all_fill_in = model_name_input and dataset_input and method_input and file_name and submitter_input
158
+
159
+ col_sumit_and_recalculate = st.columns(2)
160
+ with col_sumit_and_recalculate[0]:
161
+ calculate_btn = st.button("calculate", type='secondary', use_container_width=True)
162
+ with col_sumit_and_recalculate[1]:
163
+ submit_btn = st.button("SUBMIT", type='primary', use_container_width=True , disabled=not check_all_fill_in)
164
+
165
+ if calculate_btn or st.session_state['score_json'] is None:
166
+ set_name = dataset_input
167
+ st.session_state['score_json'] = eval_answer_txt(set_name, uploaded_file)
168
+ if st.session_state['score_json']:
169
+ st.success("evaluation success")
170
+ else:
171
+ st.error("evaluation failed, please check the file content or set the correct dataset name.")
172
+
173
+ if st.session_state['score_json']:
174
+ with col_score[0], st.container(border=True):
175
+ df = pd.DataFrame(st.session_state['score_json']).T
176
+ # split the column MICRO_AVERAGE and MACRO_AVERAGE into another dataframe
177
+ tag_df = df.drop(["MICRO_AVERAGE", "MACRO_AVERAGE"], axis=0)
178
+ avg_df = df.loc[["MICRO_AVERAGE", "MACRO_AVERAGE"]]
179
+
180
+ col_sort_func = st.columns(2)
181
+
182
+ with col_sort_func[0]:
183
+ sorted_column = st.selectbox("选择排序列", df.columns)
184
+
185
+ with col_sort_func[1]:
186
+ ascending = st.radio("Sort Order", ["Ascending", "Descending"])
187
+
188
+ tag_df = tag_df.sort_values(by=sorted_column, ascending=ascending=="Ascending")
189
+
190
+ st.dataframe(pd.concat([tag_df, avg_df]), use_container_width=True)
191
+
192
+
193
+ if not check_all_fill_in:
194
+ st.warning("Please fill in the complete information.")
195
+
196
+ if submit_btn:
197
+ if st.session_state['score_json']:
198
+ score_json = st.session_state['score_json']
199
+
200
+ leaderboard_dict = {
201
+ "model name": model_name_input,
202
+ "dataset": dataset_input,
203
+ "method": method_input,
204
+ "file name": file_name,
205
+ "submitter": submitter_input,
206
+
207
+ "MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
208
+ "MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
209
+ "MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
210
+ "MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
211
+ "MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
212
+ "MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
213
+ "detail result": json.dumps(score_json,indent=4) #score_json
214
+ }
215
+
216
+ repo_file_path = f'data/train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
217
+ upload_res = upload_scores_to_hub(HUB_API, leaderboard_dict, repo_file_path, hub_repo=LEADERBOARD_DATASET_REPO)
218
+ if upload_res:
219
+ st.success(f"submit success")
220
+ st.success(f"your score at here: {upload_res}")
221
+ else:
222
+ st.error("submit failed")
223
+
224
+
batch_eval_script.py CHANGED
@@ -1,94 +1,94 @@
1
- # a argparse script it can set eval dir
2
- # and run the eval script in the dir then save the reasult json file in the dir
3
- # usage: python .\batch_eval_script.py ..\deid_resaut
4
-
5
- import os
6
- import json
7
- import argparse
8
- import streamlit as st
9
-
10
- from huggingface_hub import hf_hub_download
11
-
12
- from utils.Evaluation_answer_txt import Evaluation_answer_txt
13
- from utils.upload_hub import file_name_decode
14
-
15
- # Function to download gold answer based on dataset name
16
- def download_gold_answer(repo, filename, token, force_download=False):
17
- ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
18
- return ret
19
-
20
- HUB_TOKEN = st.secrets['hf']
21
- ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
22
- GET_GOLD_ANSWER_PATH = {
23
- 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
24
- 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
25
- 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
26
- }
27
-
28
- # Function to evaluate answer text
29
- def eval_answer_txt(set_name, uploaded_file_path):
30
- if set_name not in GET_GOLD_ANSWER_PATH:
31
- return None
32
- gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
33
- eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
34
- score_json = eval.eval()
35
- return score_json
36
-
37
- # Function to traverse directory and evaluate files
38
- def evaluate_directory(input_dir, output_dir='./.output'):
39
- os.makedirs(output_dir, exist_ok=True)
40
- for root, _, files in os.walk(input_dir):
41
- for file in files:
42
- filename_info = file_name_decode(file)
43
- if filename_info:
44
- model_name_input = filename_info['model_name']
45
- dataset_input = filename_info['dataset']
46
- method_input = filename_info['method']
47
- file_name = filename_info['file_name']
48
-
49
- file_path = os.path.join(root, file)
50
- # get full path of the file
51
- file_path = os.path.abspath(file_path)
52
- score_json = eval_answer_txt(dataset_input, file_path)
53
- # print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
54
- if score_json:
55
- leaderboard_dict = {
56
- "model name": model_name_input,
57
- "dataset": dataset_input,
58
- "method": method_input,
59
- "file name": file_name,
60
- "submitter": 'zhaorui',
61
-
62
- "MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
63
- "MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
64
- "MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
65
- "MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
66
- "MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
67
- "MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
68
- "detail result": json.dumps(score_json,indent=4) #score_json
69
- }
70
-
71
- # train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
72
- repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
73
- output_path = os.path.join(output_dir, repo_file_name)
74
- with open(output_path, 'w') as f:
75
- json.dump(leaderboard_dict, f, indent=4)
76
- else:
77
- print(f"Failed to evaluate {file_path}")
78
-
79
-
80
-
81
- # Main function to handle argparse
82
- def main():
83
- parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
84
- parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
85
- parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
86
-
87
- args = parser.parse_args()
88
-
89
- evaluate_directory(args.input_dir, args.output_dir)
90
-
91
- print(f"Evaluation completed. Results saved to evaluation_results.json")
92
-
93
- if __name__ == "__main__":
94
- main()
 
1
+ # a argparse script it can set eval dir
2
+ # and run the eval script in the dir then save the reasult json file in the dir
3
+ # usage: python .\batch_eval_script.py ..\deid_resaut
4
+
5
+ import os
6
+ import json
7
+ import argparse
8
+ import streamlit as st
9
+
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ from utils.Evaluation_answer_txt import Evaluation_answer_txt
13
+ from utils.upload_hub import file_name_decode
14
+
15
+ # Function to download gold answer based on dataset name
16
+ def download_gold_answer(repo, filename, token, force_download=False):
17
+ ret = hf_hub_download(repo_id=repo, repo_type='dataset', filename=filename, token=token, force_download=force_download)
18
+ return ret
19
+
20
+ HUB_TOKEN = st.secrets['hf']
21
+ ANSWER_REPO = 'zhaorui-nb/leaderboard-answer'
22
+ GET_GOLD_ANSWER_PATH = {
23
+ 'Setting1': download_gold_answer(ANSWER_REPO, 'dataset/Setting1_test_answer.txt', HUB_TOKEN),
24
+ 'Setting2': download_gold_answer(ANSWER_REPO, 'dataset/Setting2_test_answer.txt', HUB_TOKEN),
25
+ 'Setting3': download_gold_answer(ANSWER_REPO, 'dataset/Setting3_test_answer.txt', HUB_TOKEN)
26
+ }
27
+
28
+ # Function to evaluate answer text
29
+ def eval_answer_txt(set_name, uploaded_file_path):
30
+ if set_name not in GET_GOLD_ANSWER_PATH:
31
+ return None
32
+ gold_answer_txt = GET_GOLD_ANSWER_PATH[set_name]
33
+ eval = Evaluation_answer_txt(gold_answer_txt, uploaded_file_path)
34
+ score_json = eval.eval()
35
+ return score_json
36
+
37
+ # Function to traverse directory and evaluate files
38
+ def evaluate_directory(input_dir, output_dir='./.output'):
39
+ os.makedirs(output_dir, exist_ok=True)
40
+ for root, _, files in os.walk(input_dir):
41
+ for file in files:
42
+ filename_info = file_name_decode(file)
43
+ if filename_info:
44
+ model_name_input = filename_info['model_name']
45
+ dataset_input = filename_info['dataset']
46
+ method_input = filename_info['method']
47
+ file_name = filename_info['file_name']
48
+
49
+ file_path = os.path.join(root, file)
50
+ # get full path of the file
51
+ file_path = os.path.abspath(file_path)
52
+ score_json = eval_answer_txt(dataset_input, file_path)
53
+ # print(f"sss" , GET_GOLD_ANSWER_PATH[dataset_input], file_path)
54
+ if score_json:
55
+ leaderboard_dict = {
56
+ "model name": model_name_input,
57
+ "dataset": dataset_input,
58
+ "method": method_input,
59
+ "file name": file_name,
60
+ "submitter": 'zhaorui',
61
+
62
+ "MICRO precision": score_json["MICRO_AVERAGE"]["precision"],
63
+ "MICRO recall": score_json["MICRO_AVERAGE"]["recall"],
64
+ "MICRO f1": score_json["MICRO_AVERAGE"]["f1"],
65
+ "MACRO precision": score_json["MACRO_AVERAGE"]["precision"],
66
+ "MACRO recall": score_json["MACRO_AVERAGE"]["recall"],
67
+ "MACRO f1": score_json["MACRO_AVERAGE"]["f1"],
68
+ "detail result": json.dumps(score_json,indent=4) #score_json
69
+ }
70
+
71
+ # train-[01-ai@Yi-1.5-6B-Chat][Setting1][icl][answer.txt].json
72
+ repo_file_name = f'train-[{model_name_input}][{dataset_input}][{method_input}][{file_name}].json'
73
+ output_path = os.path.join(output_dir, repo_file_name)
74
+ with open(output_path, 'w') as f:
75
+ json.dump(leaderboard_dict, f, indent=4)
76
+ else:
77
+ print(f"Failed to evaluate {file_path}")
78
+
79
+
80
+
81
+ # Main function to handle argparse
82
+ def main():
83
+ parser = argparse.ArgumentParser(description="Evaluate all text files in the given directory.")
84
+ parser.add_argument('input_dir', type=str, help='Path to the directory containing text files.')
85
+ parser.add_argument('--output_dir', type=str, default='./.output', help='Path to the directory to save the output json files.')
86
+
87
+ args = parser.parse_args()
88
+
89
+ evaluate_directory(args.input_dir, args.output_dir)
90
+
91
+ print(f"Evaluation completed. Results saved to evaluation_results.json")
92
+
93
+ if __name__ == "__main__":
94
+ main()
utils/Evaluation_answer_txt.py CHANGED
@@ -1,180 +1,180 @@
1
- import re
2
- import os
3
- from collections import Counter
4
- import json
5
-
6
-
7
- class Tag:
8
- def __init__(self, txt_line:str):
9
- # | file_name | label_type | label_start | label_end | label_text |
10
- # match = re.match(r'(.+)\t(\w+)\t(\d+)\t(\d+)\t(.+)', txt_line)
11
- try:
12
- sep = txt_line.strip().split('\t')
13
- self.file_id = sep[0]
14
- self.type = sep[1]
15
- self.start = sep[2] # int(sep[2])
16
- self.end = sep[3] # int(sep[3])
17
- self.text = sep[4]
18
- except:
19
- raise ValueError('The format of the input line is not correct. Please check the input line format.')
20
-
21
- def get_type(self):
22
- return self.type
23
-
24
- def get_file_id(self):
25
- return self.file_id
26
-
27
- def __eq__(self, other: 'Tag'):
28
- # if all file_id, type, start, end, are the same, return True
29
- # text is not considered for the comparison
30
- ck_file_id = self.file_id == other.file_id
31
- ck_type = self.type == other.type
32
- ck_start = self.start == other.start
33
- ck_end = self.end == other.end
34
- # ck_text = self.text == other.text
35
- if ck_file_id and ck_type and ck_start and ck_end:
36
- return True
37
- else:
38
- return False
39
- def __repr__(self):
40
- return f'<{self.__class__.__name__} {self.file_id:10} {self.type:10} s:{self.start:5} e:{self.end:5} {self.text}>\n'
41
-
42
- def __hash__(self):
43
- return hash((self.file_id, self.type, self.start, self.end))
44
-
45
- class Evaluation_answer_txt:
46
- def __init__(self, gold_answer, pred_answer):
47
- self.gold_answer = gold_answer
48
- self.pred_answer = pred_answer
49
-
50
- self.gold_set = set() # set of Tag
51
- self.pred_set = set() # set of Tag
52
-
53
- self.type_set = set() # set of label type str
54
- self.gold_label_counter = Counter() # Counter of gold label type
55
-
56
- self.resault_score = {}
57
-
58
- def _lines_to_tag_set(self, lines, set_type): # set_type: 'gold' or 'pred'
59
- tags = []
60
- for i in range(len(lines)):
61
- try:
62
- tag = Tag(lines[i])
63
- tags.append(tag)
64
- except:
65
- print(f'Error at {set_type} answer line: {i+1}, {lines[i]}')
66
- return set(tags)
67
-
68
- def _set_filter(self, tag_set, type):
69
- # tag set filter by type
70
- return {tag for tag in tag_set if tag.get_type() == type}
71
-
72
- def _division(self, a, b):
73
- try:
74
- return a / b
75
- except:
76
- return 0.0
77
-
78
- def _f1_score(self, TP=None, FP=None, FN=None):
79
- if TP is None or FP is None or FN is None:
80
- raise ValueError('TP, FP, FN should be given.')
81
-
82
- precision = self._division(TP, TP + FP)
83
- recall = self._division(TP, TP + FN)
84
- f1 = self._division(2 * precision * recall, precision + recall)
85
-
86
- return {'precision': precision, 'recall': recall, 'f1': f1}
87
-
88
-
89
- def eval(self, ignore_no_gold_tag_file=True):
90
- with open(self.gold_answer, 'r') as f:
91
- gold_line = f.readlines()
92
- # with open(self.pred_answer, 'r') as f:
93
- # pred_line = f.readlines()
94
- ########## add to support the input is a file object ##########
95
- if isinstance(self.pred_answer, str):
96
- with open(self.pred_answer, 'r') as f:
97
- pred_line = f.readlines()
98
-
99
-
100
- else:
101
- pred_line = self.pred_answer.readlines()
102
- #pred_line is bytes, need to decode
103
- pred_line = [line.decode('utf-8') for line in pred_line]
104
-
105
- self.gold_set = self._lines_to_tag_set(gold_line, 'gold')
106
- self.pred_set = self._lines_to_tag_set(pred_line, 'pred')
107
-
108
- # in islab aicup program, it will ignore the files that have no gold tags
109
- # that program only consider the files that write in gold answer.txt
110
- if ignore_no_gold_tag_file:
111
- # filter the files that have no gold tags
112
- gold_files = {tag.get_file_id() for tag in self.gold_set}
113
- self.pred_set = {tag for tag in self.pred_set if tag.get_file_id() in gold_files}
114
-
115
- # statistics tags and types
116
- for tag in self.gold_set:
117
- self.type_set.add(tag.get_type())
118
- self.gold_label_counter[tag.get_type()] += 1
119
- for tag in self.pred_set:
120
- self.type_set.add(tag.get_type())
121
-
122
- TP_set = self.gold_set & self.pred_set
123
- FP_set = self.pred_set - self.gold_set
124
- FN_set = self.gold_set - self.pred_set
125
-
126
- # count each type of label
127
- for label in self.type_set:
128
- filter_TP = self._set_filter(TP_set, label)
129
- filter_FP = self._set_filter(FP_set, label)
130
- filter_FN = self._set_filter(FN_set, label)
131
- score = self._f1_score(len(filter_TP), len(filter_FP), len(filter_FN))
132
- self.resault_score[label] = score
133
-
134
- # MICRO_AVERAGE
135
- self.resault_score['MICRO_AVERAGE'] = self._f1_score(len(TP_set), len(FP_set), len(FN_set))
136
-
137
- # MACRO_AVERAGE
138
- precision_sum = 0
139
- recall_sum = 0
140
- # f1_sum = 0 # at aicup, calc by MACRO_AVERAGE precision and recall
141
- for label in self.type_set:
142
- precision_sum += self.resault_score[label]['precision']
143
- recall_sum += self.resault_score[label]['recall']
144
- # f1_sum += self.resault_score[label]['f1']
145
-
146
- precision = self._division(precision_sum, len(self.type_set))
147
- recall = self._division(recall_sum, len(self.type_set))
148
- # f1 = 2 * precision * recall / (precision + recall)
149
- f1 = self._division(2 * precision * recall , (precision + recall))
150
-
151
- self.resault_score['MACRO_AVERAGE'] = {'precision': precision, 'recall': recall, 'f1': f1}
152
-
153
- # add Support to each type of label
154
- for label in self.type_set:
155
- self.resault_score[label]['support'] = self.gold_label_counter[label]
156
- self.resault_score['MICRO_AVERAGE']['support'] = len(self.gold_set)
157
- self.resault_score['MACRO_AVERAGE']['support'] = len(self.gold_set)
158
-
159
- # return json.dumps(self.resault_score, indent=4)
160
- return self.resault_score
161
-
162
-
163
- if __name__=="__main__":
164
- # with open('.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt', 'r', encoding='utf-8') as f:
165
- # lines = [line.strip() for line in f.readlines() if line.strip() != '']
166
-
167
- # gold_path = 'dataset/Setting3_test_answer.txt'
168
- # pred_path = '.output/EleutherAI-pythia-1b-Setting3_answer.txt'
169
-
170
-
171
- # gold_path = './.output/test_eval/gold_answer.txt'
172
- # pred_path = './.output/test_eval/pred_answer.txt'
173
-
174
- gold_path = 'dataset/Setting3_test_answer.txt'
175
- pred_path = '.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt'
176
-
177
-
178
- eval = Evaluation_answer_txt(gold_path, pred_path)
179
- res = eval.eval()
180
  print(res)
 
1
+ import re
2
+ import os
3
+ from collections import Counter
4
+ import json
5
+
6
+
7
+ class Tag:
8
+ def __init__(self, txt_line:str):
9
+ # | file_name | label_type | label_start | label_end | label_text |
10
+ # match = re.match(r'(.+)\t(\w+)\t(\d+)\t(\d+)\t(.+)', txt_line)
11
+ try:
12
+ sep = txt_line.strip().split('\t')
13
+ self.file_id = sep[0]
14
+ self.type = sep[1]
15
+ self.start = sep[2] # int(sep[2])
16
+ self.end = sep[3] # int(sep[3])
17
+ self.text = sep[4]
18
+ except:
19
+ raise ValueError('The format of the input line is not correct. Please check the input line format.')
20
+
21
+ def get_type(self):
22
+ return self.type
23
+
24
+ def get_file_id(self):
25
+ return self.file_id
26
+
27
+ def __eq__(self, other: 'Tag'):
28
+ # if all file_id, type, start, end, are the same, return True
29
+ # text is not considered for the comparison
30
+ ck_file_id = self.file_id == other.file_id
31
+ ck_type = self.type == other.type
32
+ ck_start = self.start == other.start
33
+ ck_end = self.end == other.end
34
+ # ck_text = self.text == other.text
35
+ if ck_file_id and ck_type and ck_start and ck_end:
36
+ return True
37
+ else:
38
+ return False
39
+ def __repr__(self):
40
+ return f'<{self.__class__.__name__} {self.file_id:10} {self.type:10} s:{self.start:5} e:{self.end:5} {self.text}>\n'
41
+
42
+ def __hash__(self):
43
+ return hash((self.file_id, self.type, self.start, self.end))
44
+
45
+ class Evaluation_answer_txt:
46
+ def __init__(self, gold_answer, pred_answer):
47
+ self.gold_answer = gold_answer
48
+ self.pred_answer = pred_answer
49
+
50
+ self.gold_set = set() # set of Tag
51
+ self.pred_set = set() # set of Tag
52
+
53
+ self.type_set = set() # set of label type str
54
+ self.gold_label_counter = Counter() # Counter of gold label type
55
+
56
+ self.resault_score = {}
57
+
58
+ def _lines_to_tag_set(self, lines, set_type): # set_type: 'gold' or 'pred'
59
+ tags = []
60
+ for i in range(len(lines)):
61
+ try:
62
+ tag = Tag(lines[i])
63
+ tags.append(tag)
64
+ except:
65
+ print(f'Error at {set_type} answer line: {i+1}, {lines[i]}')
66
+ return set(tags)
67
+
68
+ def _set_filter(self, tag_set, type):
69
+ # tag set filter by type
70
+ return {tag for tag in tag_set if tag.get_type() == type}
71
+
72
+ def _division(self, a, b):
73
+ try:
74
+ return a / b
75
+ except:
76
+ return 0.0
77
+
78
+ def _f1_score(self, TP=None, FP=None, FN=None):
79
+ if TP is None or FP is None or FN is None:
80
+ raise ValueError('TP, FP, FN should be given.')
81
+
82
+ precision = self._division(TP, TP + FP)
83
+ recall = self._division(TP, TP + FN)
84
+ f1 = self._division(2 * precision * recall, precision + recall)
85
+
86
+ return {'precision': precision, 'recall': recall, 'f1': f1}
87
+
88
+
89
+ def eval(self, ignore_no_gold_tag_file=True):
90
+ with open(self.gold_answer, 'r') as f:
91
+ gold_line = f.readlines()
92
+ # with open(self.pred_answer, 'r') as f:
93
+ # pred_line = f.readlines()
94
+ ########## add to support the input is a file object ##########
95
+ if isinstance(self.pred_answer, str):
96
+ with open(self.pred_answer, 'r') as f:
97
+ pred_line = f.readlines()
98
+
99
+
100
+ else:
101
+ pred_line = self.pred_answer.readlines()
102
+ #pred_line is bytes, need to decode
103
+ pred_line = [line.decode('utf-8') for line in pred_line]
104
+
105
+ self.gold_set = self._lines_to_tag_set(gold_line, 'gold')
106
+ self.pred_set = self._lines_to_tag_set(pred_line, 'pred')
107
+
108
+ # in islab aicup program, it will ignore the files that have no gold tags
109
+ # that program only consider the files that write in gold answer.txt
110
+ if ignore_no_gold_tag_file:
111
+ # filter the files that have no gold tags
112
+ gold_files = {tag.get_file_id() for tag in self.gold_set}
113
+ self.pred_set = {tag for tag in self.pred_set if tag.get_file_id() in gold_files}
114
+
115
+ # statistics tags and types
116
+ for tag in self.gold_set:
117
+ self.type_set.add(tag.get_type())
118
+ self.gold_label_counter[tag.get_type()] += 1
119
+ for tag in self.pred_set:
120
+ self.type_set.add(tag.get_type())
121
+
122
+ TP_set = self.gold_set & self.pred_set
123
+ FP_set = self.pred_set - self.gold_set
124
+ FN_set = self.gold_set - self.pred_set
125
+
126
+ # count each type of label
127
+ for label in self.type_set:
128
+ filter_TP = self._set_filter(TP_set, label)
129
+ filter_FP = self._set_filter(FP_set, label)
130
+ filter_FN = self._set_filter(FN_set, label)
131
+ score = self._f1_score(len(filter_TP), len(filter_FP), len(filter_FN))
132
+ self.resault_score[label] = score
133
+
134
+ # MICRO_AVERAGE
135
+ self.resault_score['MICRO_AVERAGE'] = self._f1_score(len(TP_set), len(FP_set), len(FN_set))
136
+
137
+ # MACRO_AVERAGE
138
+ precision_sum = 0
139
+ recall_sum = 0
140
+ # f1_sum = 0 # at aicup, calc by MACRO_AVERAGE precision and recall
141
+ for label in self.type_set:
142
+ precision_sum += self.resault_score[label]['precision']
143
+ recall_sum += self.resault_score[label]['recall']
144
+ # f1_sum += self.resault_score[label]['f1']
145
+
146
+ precision = self._division(precision_sum, len(self.type_set))
147
+ recall = self._division(recall_sum, len(self.type_set))
148
+ # f1 = 2 * precision * recall / (precision + recall)
149
+ f1 = self._division(2 * precision * recall , (precision + recall))
150
+
151
+ self.resault_score['MACRO_AVERAGE'] = {'precision': precision, 'recall': recall, 'f1': f1}
152
+
153
+ # add Support to each type of label
154
+ for label in self.type_set:
155
+ self.resault_score[label]['support'] = self.gold_label_counter[label]
156
+ self.resault_score['MICRO_AVERAGE']['support'] = len(self.gold_set)
157
+ self.resault_score['MACRO_AVERAGE']['support'] = len(self.gold_set)
158
+
159
+ # return json.dumps(self.resault_score, indent=4)
160
+ return self.resault_score
161
+
162
+
163
+ if __name__=="__main__":
164
+ # with open('.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt', 'r', encoding='utf-8') as f:
165
+ # lines = [line.strip() for line in f.readlines() if line.strip() != '']
166
+
167
+ # gold_path = 'dataset/Setting3_test_answer.txt'
168
+ # pred_path = '.output/EleutherAI-pythia-1b-Setting3_answer.txt'
169
+
170
+
171
+ # gold_path = './.output/test_eval/gold_answer.txt'
172
+ # pred_path = './.output/test_eval/pred_answer.txt'
173
+
174
+ gold_path = 'dataset/Setting3_test_answer.txt'
175
+ pred_path = '.output/[meta-llama@Llama-2-7b-hf][Setting3][icl]answer.txt'
176
+
177
+
178
+ eval = Evaluation_answer_txt(gold_path, pred_path)
179
+ res = eval.eval()
180
  print(res)
utils/upload_hub.py CHANGED
@@ -1,56 +1,56 @@
1
- import json
2
- import uuid
3
- import os
4
- import re
5
- from huggingface_hub import HfApi
6
- from huggingface_hub import hf_hub_download
7
-
8
-
9
-
10
- def file_name_decode(file_name):
11
- # model_name,dataset,method,answer.txt
12
- # input file name example: [mistralai@Mistral-7B-Instruct-v0.3][Setting3][icl]answer.txt
13
-
14
- match = re.match(rf'\[([^\[^\]]+)\]\[([^\[^\]]+)\]\[([^\[^\]]+)\]([^\[^\]]+)', file_name)
15
-
16
- if match:
17
- model_name, dataset, method, file_name = match.groups()
18
- ret_dict = {
19
- 'model_name': model_name,
20
- 'dataset': dataset,
21
- 'method': method,
22
- 'file_name': file_name
23
- }
24
- return ret_dict
25
- return None
26
-
27
- def upload_scores_to_hub(api, scores_dict, path_in_repo,hub_repo='zhaorui-nb/test_json'):
28
- # id = str(uuid.uuid4())
29
- save_json_path = f'.output/upload.json'
30
- os.makedirs(os.path.dirname(save_json_path), exist_ok=True)
31
- with open(save_json_path, 'w') as f:
32
- json.dump(scores_dict, f , indent=4)
33
-
34
- # SAVE JSON TO HUB
35
- res = api.upload_file(
36
- path_or_fileobj=save_json_path,
37
- path_in_repo=path_in_repo, #f'data/train,{os.path.basename(save_json_path)}',
38
- repo_id=hub_repo,
39
- repo_type="dataset",
40
- )
41
-
42
- return res
43
-
44
-
45
-
46
-
47
-
48
- if __name__ == "__main__":
49
-
50
- pass
51
-
52
-
53
-
54
-
55
-
56
-
 
1
+ import json
2
+ import uuid
3
+ import os
4
+ import re
5
+ from huggingface_hub import HfApi
6
+ from huggingface_hub import hf_hub_download
7
+
8
+
9
+
10
+ def file_name_decode(file_name):
11
+ # model_name,dataset,method,answer.txt
12
+ # input file name example: [mistralai@Mistral-7B-Instruct-v0.3][Setting3][icl]answer.txt
13
+
14
+ match = re.match(rf'\[([^\[^\]]+)\]\[([^\[^\]]+)\]\[([^\[^\]]+)\]([^\[^\]]+)', file_name)
15
+
16
+ if match:
17
+ model_name, dataset, method, file_name = match.groups()
18
+ ret_dict = {
19
+ 'model_name': model_name,
20
+ 'dataset': dataset,
21
+ 'method': method,
22
+ 'file_name': file_name
23
+ }
24
+ return ret_dict
25
+ return None
26
+
27
+ def upload_scores_to_hub(api, scores_dict, path_in_repo,hub_repo='zhaorui-nb/test_json'):
28
+ # id = str(uuid.uuid4())
29
+ save_json_path = f'.output/upload.json'
30
+ os.makedirs(os.path.dirname(save_json_path), exist_ok=True)
31
+ with open(save_json_path, 'w') as f:
32
+ json.dump(scores_dict, f , indent=4)
33
+
34
+ # SAVE JSON TO HUB
35
+ res = api.upload_file(
36
+ path_or_fileobj=save_json_path,
37
+ path_in_repo=path_in_repo, #f'data/train,{os.path.basename(save_json_path)}',
38
+ repo_id=hub_repo,
39
+ repo_type="dataset",
40
+ )
41
+
42
+ return res
43
+
44
+
45
+
46
+
47
+
48
+ if __name__ == "__main__":
49
+
50
+ pass
51
+
52
+
53
+
54
+
55
+
56
+