Spaces:
Sleeping
Sleeping
File size: 8,288 Bytes
5401755 9700fe3 aed7d5e b001ab7 8144c2e b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 aed7d5e 78829de aed7d5e 446384d 1992834 aed7d5e b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 5401755 b001ab7 5401755 b001ab7 9700fe3 b001ab7 5401755 aed7d5e 9700fe3 b001ab7 5401755 b001ab7 5401755 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 9700fe3 b001ab7 5401755 9700fe3 b001ab7 5401755 9700fe3 5401755 b001ab7 5401755 b001ab7 5401755 b001ab7 5401755 9700fe3 |
|
import gradio as gr
from datasets import load_dataset, Dataset
from collections import defaultdict
import random
import requests
import os
from langdetect import detect
import pandas as pd
from utils import *
# Load the source dataset
source_dataset = load_dataset("vietdata/eng_echo", split="train")
eng_texts = list(set(source_dataset["query"] + source_dataset["positive"] + source_dataset["negative"]))
vi_texts = []
# Initialize variables
envi_translations = []
vien_translations = []
trans2score = dict()
packages = [[0, "None", "None", 0, float('inf'), float("inf")]]
num = 1000
def authenticate(user_id):
url = "https://intern-api.imtaedu.com/api/subnets/1/authenticate"
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Public-Api-Key": os.environ['ADMIN']
}
payload = { "token": user_id }
response = requests.post(url, json=payload, headers=headers)
return response.status_code == 200
def send_score(user_id, score):
max_retries = 10
while max_retries > 0:
url = "https://intern-api.imtaedu.com/api/subnets/1/grade"
payload = {
"token": user_id,
"comment": "Good job!",
"grade": score,
"submitted_at": "2021-01-01 00:00:00",
"graded_at": "2021-01-01 00:00:00"
}
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Public-Api-Key": os.environ['ADMIN']
}
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
return True
print(response)
max_retries -= 1
return False
# Helper function to get the next text for translation
def get_next_en_text(user_id):
next_text = random.choice(eng_texts)
return next_text
def get_next_package(user_id):
if len(packages) == 0:
return None
save = False
count = 0
for i in range(1, len(packages)):
if count >= num:
save_to_translated_echo()
return packages[0]
if packages[i][-2] > 0 :#and packages[i][0] != user_id:
packages[0][-2] -= 1
return packages[i]
if packages[i][-2] == 0 and packages[i][-2] == packages[i][-1]:
count += 1
return packages[0]
# Function to handle translation submission
def submit_translation(user_id, package, vi_translation, en_text, en_translation, vi_text):
assert vi_translation != ""
if vi_translation != "" and detect(vi_translation) != "vi":
gr.Warning("Bản dịch không phải tiếng Việt", duration=5)
assert 4==5
if en_translation != "" and detect(en_translation) != "en":
print(en_translation, detect(en_translation))
gr.Warning("Bản dịch không phải tiếng Anh", duration=5)
assert 4==5
first_score = gg_score(en_text, vi_translation, target="vi")
second_score = miner_score(package[0][1], en_translation)
ref_score = gg_score(package[0][2], en_translation, target="en")
trust_score = 1 - abs(second_score - ref_score)/max((second_score+ref_score)/2, 0.1)
packages.append([user_id, en_text, vi_translation, first_score*trust_score*0.5, 10, 10])
package[0][3] += second_score*trust_score*0.05
package[0][-1] -= 1
assert send_score(user_id, first_score*trust_score*0.5)
if package[0][0] != 0:
assert send_score(package[0][0], second_score*trust_score*0.05)
# Function to save completed translations to 'translated_echo'
def save_to_translated_echo():
try:
old_dataset = load_dataset("vietdata/translated_echo", split="train")
old_dataset = old_dataset.to_pandas()
except:
old_dataset = pd.DataFrame([], columns=["user_id", "source", "target", "score"])
new_dataset = pd.DataFrame([i[:4] for i in packages[:num]], columns=["user_id", "source", "target", "score"])
new_dataset = pd.concat([old_dataset, new_dataset])
# Append to Hugging Face dataset (dummy function call)
translated_dataset = Dataset.from_pandas(new_dataset)
translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
del new_dataset
del old_dataset
del translated_dataset
import gc
gc.collect()
for i in range(num):
packages.pop(1)
# Sample English text to translate
english_text = None
# User session dictionary to store logged-in status
user_sessions = {}
def login(username, state, package):
state[0] = username
package[0] = get_next_package(user_id=username)
# Authenticate user
if authenticate(username):
#user_sessions[username] = True
return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_en_text(username), package[0][2]
else:
return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), "", ""
def logout(username):
# Log out user and reset session
if username in user_sessions:
del user_sessions[username]
return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
def press_submit_translation( state, package, vi_translation, en_input, en_translation, vi_input):
try:
submit_translation(state[0], package, vi_translation, en_input, en_translation, vi_input)
# Save the translation and provide feedback
gr.Info("Submitted Succesfully")
except Exception as e:
import traceback
print(traceback.format_exc())
print(e)
return "Error please try submit again!", en_input, vi_input, "", ""
try:
package[0] = get_next_package(user_id=state[0])
return f"""Submitted Succesfully""", get_next_en_text(state[0]), package[0][2], "", ""
except:
return "Failed to load new job, please reload page!", en_input, vi_input, "", ""
# Define the Gradio interface
with gr.Blocks() as demo:
state = gr.State([None])
package = gr.State([None])
# Login section
with gr.Column(visible=True) as login_section:
username_input = gr.Textbox(placeholder="Enter your token", label="Token ID")
login_button = gr.Button("Login")
login_output = gr.Textbox(label="Login Status", interactive=False)
# Translation section (initially hidden)
with gr.Column(visible=False) as translation_section:
with gr.Column() as en2vi:
gr.Markdown("### Dịch từ tiếng Anh sang tiếng Việt")
en_input = gr.Textbox(value=english_text, label="Văn bản tiếng Anh", interactive=False)
vi_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Việt")
with gr.Column() as en2vi:
gr.Markdown("### Dịch từ tiếng Việt sang tiếng Anh")
vi_input = gr.Textbox(value=english_text, label="Văn bản tiếng Việt", interactive=False)
en_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Anh")
# gr.Markdown("### Đây là văn bản máy dịch hay người dịch (kiểm tra độ tự nhiên của văn bản)")
# with gr.Row():
# eval_document = gr.Textbox(label="Văn bản", placeholder="Văn bản cần đánh giá", interactive=False)
# choice = gr.Radio(["Human-Written", "Machine-Translated"], label="How would you classify this response?")
submit_button = gr.Button("Submit")
translation_output = gr.Textbox(label="Submission Status", interactive=False)
logout_button = gr.Button("Logout")
# Button functions
login_button.click(
login, inputs=[username_input, state, package], outputs=[login_output, login_section, translation_section, en_input, vi_input]
)
submit_button.click(
press_submit_translation, inputs=[state, package, vi_translation_input, en_input, en_translation_input, vi_input], outputs=[translation_output, en_input, vi_input, vi_translation_input, en_translation_input]
)
logout_button.click(
logout, inputs=[username_input], outputs=[login_output, login_section, translation_section]
)
demo.launch(debug=True)
|