Spaces:
Sleeping
Sleeping
File size: 8,288 Bytes
5401755 9700fe3 aed7d5e b001ab7 8144c2e b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 aed7d5e 78829de aed7d5e 446384d 1992834 aed7d5e b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 5401755 b001ab7 5401755 b001ab7 9700fe3 b001ab7 5401755 aed7d5e 9700fe3 b001ab7 5401755 b001ab7 5401755 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 5401755 9700fe3 b001ab7 5401755 9700fe3 5401755 b001ab7 5401755 b001ab7 5401755 b001ab7 5401755 9700fe3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import gradio as gr
from datasets import load_dataset, Dataset
from collections import defaultdict
import random
import requests
import os
from langdetect import detect
import pandas as pd
from utils import *
# Load the source dataset
source_dataset = load_dataset("vietdata/eng_echo", split="train")
eng_texts = list(set(source_dataset["query"] + source_dataset["positive"] + source_dataset["negative"]))
vi_texts = []
# Initialize variables
envi_translations = []
vien_translations = []
trans2score = dict()
packages = [[0, "None", "None", 0, float('inf'), float("inf")]]
num = 1000
def authenticate(user_id):
url = "https://intern-api.imtaedu.com/api/subnets/1/authenticate"
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Public-Api-Key": os.environ['ADMIN']
}
payload = { "token": user_id }
response = requests.post(url, json=payload, headers=headers)
return response.status_code == 200
def send_score(user_id, score):
max_retries = 10
while max_retries > 0:
url = "https://intern-api.imtaedu.com/api/subnets/1/grade"
payload = {
"token": user_id,
"comment": "Good job!",
"grade": score,
"submitted_at": "2021-01-01 00:00:00",
"graded_at": "2021-01-01 00:00:00"
}
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Public-Api-Key": os.environ['ADMIN']
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
return True
print(response)
max_retries -= 1
return False
# Helper function to get the next text for translation
def get_next_en_text(user_id):
next_text = random.choice(eng_texts)
return next_text
def get_next_package(user_id):
if len(packages) == 0:
return None
save = False
count = 0
for i in range(1, len(packages)):
if count >= num:
save_to_translated_echo()
return packages[0]
if packages[i][-2] > 0 :#and packages[i][0] != user_id:
packages[0][-2] -= 1
return packages[i]
if packages[i][-2] == 0 and packages[i][-2] == packages[i][-1]:
count += 1
return packages[0]
# Function to handle translation submission
def submit_translation(user_id, package, vi_translation, en_text, en_translation, vi_text):
assert vi_translation != ""
if vi_translation != "" and detect(vi_translation) != "vi":
gr.Warning("Bản dịch không phải tiếng Việt", duration=5)
assert 4==5
if en_translation != "" and detect(en_translation) != "en":
print(en_translation, detect(en_translation))
gr.Warning("Bản dịch không phải tiếng Anh", duration=5)
assert 4==5
first_score = gg_score(en_text, vi_translation, target="vi")
second_score = miner_score(package[0][1], en_translation)
ref_score = gg_score(package[0][2], en_translation, target="en")
trust_score = 1 - abs(second_score - ref_score)/max((second_score+ref_score)/2, 0.1)
packages.append([user_id, en_text, vi_translation, first_score*trust_score*0.5, 10, 10])
package[0][3] += second_score*trust_score*0.05
package[0][-1] -= 1
assert send_score(user_id, first_score*trust_score*0.5)
if package[0][0] != 0:
assert send_score(package[0][0], second_score*trust_score*0.05)
# Function to save completed translations to 'translated_echo'
def save_to_translated_echo():
try:
old_dataset = load_dataset("vietdata/translated_echo", split="train")
old_dataset = old_dataset.to_pandas()
except:
old_dataset = pd.DataFrame([], columns=["user_id", "source", "target", "score"])
new_dataset = pd.DataFrame([i[:4] for i in packages[:num]], columns=["user_id", "source", "target", "score"])
new_dataset = pd.concat([old_dataset, new_dataset])
# Append to Hugging Face dataset (dummy function call)
translated_dataset = Dataset.from_pandas(new_dataset)
translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
del new_dataset
del old_dataset
del translated_dataset
import gc
gc.collect()
for i in range(num):
packages.pop(1)
# Sample English text to translate
english_text = None
# User session dictionary to store logged-in status
user_sessions = {}
def login(username, state, package):
state[0] = username
package[0] = get_next_package(user_id=username)
# Authenticate user
if authenticate(username):
#user_sessions[username] = True
return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_en_text(username), package[0][2]
else:
return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), "", ""
def logout(username):
# Log out user and reset session
if username in user_sessions:
del user_sessions[username]
return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
def press_submit_translation( state, package, vi_translation, en_input, en_translation, vi_input):
try:
submit_translation(state[0], package, vi_translation, en_input, en_translation, vi_input)
# Save the translation and provide feedback
gr.Info("Submitted Succesfully")
except Exception as e:
import traceback
print(traceback.format_exc())
print(e)
return "Error please try submit again!", en_input, vi_input, "", ""
try:
package[0] = get_next_package(user_id=state[0])
return f"""Submitted Succesfully""", get_next_en_text(state[0]), package[0][2], "", ""
except:
return "Failed to load new job, please reload page!", en_input, vi_input, "", ""
# Define the Gradio interface
with gr.Blocks() as demo:
state = gr.State([None])
package = gr.State([None])
# Login section
with gr.Column(visible=True) as login_section:
username_input = gr.Textbox(placeholder="Enter your token", label="Token ID")
login_button = gr.Button("Login")
login_output = gr.Textbox(label="Login Status", interactive=False)
# Translation section (initially hidden)
with gr.Column(visible=False) as translation_section:
with gr.Column() as en2vi:
gr.Markdown("### Dịch từ tiếng Anh sang tiếng Việt")
en_input = gr.Textbox(value=english_text, label="Văn bản tiếng Anh", interactive=False)
vi_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Việt")
with gr.Column() as en2vi:
gr.Markdown("### Dịch từ tiếng Việt sang tiếng Anh")
vi_input = gr.Textbox(value=english_text, label="Văn bản tiếng Việt", interactive=False)
en_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Anh")
# gr.Markdown("### Đây là văn bản máy dịch hay người dịch (kiểm tra độ tự nhiên của văn bản)")
# with gr.Row():
# eval_document = gr.Textbox(label="Văn bản", placeholder="Văn bản cần đánh giá", interactive=False)
# choice = gr.Radio(["Human-Written", "Machine-Translated"], label="How would you classify this response?")
submit_button = gr.Button("Submit")
translation_output = gr.Textbox(label="Submission Status", interactive=False)
logout_button = gr.Button("Logout")
# Button functions
login_button.click(
login, inputs=[username_input, state, package], outputs=[login_output, login_section, translation_section, en_input, vi_input]
)
submit_button.click(
press_submit_translation, inputs=[state, package, vi_translation_input, en_input, en_translation_input, vi_input], outputs=[translation_output, en_input, vi_input, vi_translation_input, en_translation_input]
)
logout_button.click(
logout, inputs=[username_input], outputs=[login_output, login_section, translation_section]
)
demo.launch(debug=True)
|