File size: 8,219 Bytes
71b0571 fbd9133 71b0571 d40ab7e 71b0571 d40ab7e 71b0571 d40ab7e 71b0571 d40ab7e 71b0571 d40ab7e 71b0571 66ba79f 71b0571 66ba79f 71b0571 fbd9133 71b0571 fbd9133 71b0571 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import streamlit as st
import pandas as pd
import json
import requests
import os
glm2b = pd.read_csv('glm2b-nk.csv')
glm10b = pd.read_csv('glm10b-nk.csv')
gptj = pd.read_csv('gptj-nk.csv')
gptjt = pd.read_csv('gptjt-nk.csv')
glm2b_orig = glm2b[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'origin']].rename(columns={'origin': 'inference'})
glm2b_para = glm2b[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'parallel']].rename(columns={'parallel': 'inference'})
glm10b_orig = glm10b[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'origin']].rename(columns={'origin': 'inference'})
glm10b_para = glm10b[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'parallel']].rename(columns={'parallel': 'inference'})
gptj_orig = gptj[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'origin']].rename(columns={'origin': 'inference'})
gptj_para = gptj[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'parallel']].rename(columns={'parallel': 'inference'})
gptjt_orig = gptjt[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'origin']].rename(columns={'origin': 'inference'})
gptjt_para = gptjt[['persona', 'knowledge', 'context', 'prompted text', 'ground truth', 'parallel']].rename(columns={'parallel': 'inference'})
csv_map = [glm2b_orig, glm2b_para, glm10b_orig, glm10b_para, gptj_orig, gptj_para, gptjt_orig, gptjt_para]
truth = glm2b['ground truth'].to_list()
users = json.load(open('data/users.json'))
def new_data(user_data):
process_bar.progress((user_data['all_process']-user_data['start_process'])/(user_data['stop_process']-user_data['start_process']), text='进度')
if user_data['all_process'] == user_data['stop_process']:
return 'finish'
csv_idx = user_data['model_list'][user_data['all_process']]
sample = csv_map[csv_idx].iloc[user_data['data_idx'][csv_idx][user_data['process'][csv_idx]]]
persona = sample.persona.split('\n')
new_p = []
for pi in persona:
new_p += [pi[i:i+60] for i in range(0, len(pi), 60)]
new_p = '\n'.join(new_p)
knowledge = sample.knowledge.split('\n')
new_k = []
for ki in knowledge:
new_k += [ki[i:i+60] for i in range(0, len(ki), 60)]
new_k = '\n'.join(new_k)
context = sample.context.split('\n')
context.remove(context[-1])
new_c = []
for ci in context:
new_c += [ci[i:i+120] for i in range(0, len(ci), 120)]
new_c = '\n'.join(new_c)
gtruth = sample['ground truth'].split('\n')
new_g = []
for gi in gtruth:
new_g += [gi[i:i+60] for i in range(0, len(gi), 60)]
new_g = '\n'.join(new_g)
inf = sample.inference.split('\n')
new_i = []
for ii in inf:
new_i += [ii[i:i+60] for i in range(0, len(ii), 60)]
new_i = '\n'.join(new_i)
p.text(new_p)
k.text(new_k)
c.text(new_c)
g.text(new_g)
infer.text(new_i)
return 'not finish'
st.set_page_config(layout="wide")
st.title('FoCus Annotation')
t1, t2 = st.columns(2)
with t1:
username = st.text_input("请输入用户名")
with t2:
password = st.text_input("请输入密码", type="password")
login_btn = st.button('登录')
col1, col2 = st.columns(2)
with col1:
with st.expander("人设"):
p = st.empty()
with col2:
with st.expander("知识"):
k = st.empty()
with st.expander('对话上下文'):
c = st.empty()
a1, a2 = st.columns(2)
with a1:
st.markdown("**真实值**")
g = st.empty()
with a2:
st.markdown("**待标注样本**")
infer = st.empty()
cc, kc, pc, hc, fc = st.columns(5)
with st.container():
with cc:
cs = st.selectbox("对话一致性", [0,1,2], key='cs')
with kc:
ks = st.selectbox("知识一致性", [0,1,2], key='ks')
with pc:
ps = st.selectbox("人设一致性", [0,1,2], key='ps')
with hc:
hs = st.selectbox("精炼度", [0,1,2], key='hs')
with fc:
fs = st.selectbox("流畅度", [0,1,2], key='fs')
process_bar = st.progress(0.0, text='进度')
col3, col4 = st.columns(2)
with st.container():
with col3:
prev = st.button('上一个')
with col4:
succ = st.button('下一个')
if username in users and users[username] == password:
data = {'FocusUser': username}
user_data=requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8')).content
user_data = json.loads(str(user_data, encoding="utf-8"))
# user_data = json.load(open(f'data/{username}.json'))
result = new_data(user_data)
else:
p.text("登录后开始标注")
c.text("登录后开始标注")
g.text("登录后开始标注")
k.text("登录后开始标注")
infer.text("登录后开始标注")
if login_btn:
if username in users and users[username] == password:
st.success('登录成功')
data = {'FocusUser': username}
user_data=requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8')).content
user_data = json.loads(str(user_data, encoding="utf-8"))
# user_data = json.load(open(f'data/{username}.json'))
result = new_data(user_data)
if result == 'finish':
st.success('您已完成标注')
else:
username = ''
password = ''
st.error('用户名或密码错误,请先注册。若已有账号,但忘记密码,请联系管理员修改密码')
if succ:
if username in users and users[username] == password:
data = {'FocusUser': username}
user_data=requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8')).content
user_data = json.loads(str(user_data, encoding="utf-8"))
# user_data = json.load(open(f'data/{username}.json'))
data_idx, process, all_process, model_list = user_data['data_idx'], user_data['process'], user_data['all_process'], user_data['model_list']
if all_process == user_data['stop_process']:
st.success('您已完成标注')
else:
csv_idx = model_list[all_process]
sample = csv_map[csv_idx].iloc[data_idx[csv_idx][process[csv_idx]]]
user_data['context_relevance'][csv_idx][process[csv_idx]%100] = cs
user_data['knowledge_relevance'][csv_idx][process[csv_idx]%100] = ks
user_data['persona_consistency'][csv_idx][process[csv_idx]%100] = ps
user_data['hallucination'][csv_idx][process[csv_idx]%100] = hs
user_data['fluency'][csv_idx][process[csv_idx]%100] = fs
user_data['process'][csv_idx] += 1
user_data['all_process'] += 1
data = {'Focus': user_data, 'username': username}
requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8'))
# json.dump(user_data, open(f'data/{username}.json', 'w'), ensure_ascii=False, indent=2)
result = new_data(user_data)
if result == 'finish':
st.success('您已完成标注')
else:
st.error('请先登录')
if prev:
if username in users and users[username] == password:
data = {'FocusUser': username}
user_data=requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8')).content
user_data = json.loads(str(user_data, encoding="utf-8"))
# user_data = json.load(open(f'data/{username}.json'))
model_list = user_data['model_list']
if user_data['all_process'] == user_data['start_process']:
result = new_data(user_data)
st.error('已是首个数据')
else:
user_data['all_process'] -= 1
csv_idx = model_list[user_data['all_process']]
user_data['process'][csv_idx] -= 1
result = new_data(user_data)
data = {'Focus': user_data, 'username': username}
requests.post(os.environ.get("URL"), data=json.dumps(data, ensure_ascii=False).encode('utf-8'))
# json.dump(user_data, open(f'data/{username}.json', 'w'), ensure_ascii=False, indent=2)
else:
st.error('请先登录')
|