Spaces:
Sleeping
Sleeping
# AUTOGENERATED! DO NOT EDIT! File to edit: enro-app.ipynb. | |
# %% auto 0 | |
__all__ = ['model_name', 'tokenizer', 'model', 'dataset', 'dialogs', 'encoded_key', 'decoded_bytes', 'firebase_creds', | |
'get_translations', 'clean_sentence', 'get_random_sentence', 'save_option_to_repo', 'update_prompt'] | |
# %% enro-app.ipynb 1 | |
from transformers import MarianMTModel, MarianTokenizer | |
import gradio as gr | |
# %% enro-app.ipynb 2 | |
model_name = "Helsinki-NLP/opus-mt-tc-big-en-ro" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
# %% enro-app.ipynb 3 | |
import random | |
def get_translations(input_text): | |
tokenized_text = tokenizer(input_text, return_tensors="pt") | |
# Generate multiple translations | |
# Set num_return_sequences to the number of translations you want | |
translated = model.generate(**tokenized_text, num_return_sequences=4) | |
translations = [] | |
for t in translated: | |
translations.append(tokenizer.decode(t, skip_special_tokens=True)) | |
random_translations = random.sample(translations, 2) | |
return random_translations[0], random_translations[1] | |
# %% enro-app.ipynb 5 | |
from datasets import load_dataset | |
dataset = load_dataset("daily_dialog") | |
# %% enro-app.ipynb 6 | |
import re | |
dialogs = dataset["train"] | |
# Function to clean extra spaces around punctuation marks | |
def clean_sentence(sentence): | |
# Remove space before punctuation | |
sentence = re.sub(r'\s+([?.!,"\'-])', r'\1', sentence) | |
# Remove space after punctuation | |
sentence = re.sub(r'([?.!,"\'-])\s+', r'\1 ', sentence) | |
sentence = sentence.strip() | |
return sentence | |
# Assuming dialogues is a list of lists, where each inner list contains sentences of a dialogue | |
# Example: dialogues = [["Hello, how are you?", "I'm fine, thank you!"], ["What's your name?", "My name is John."]] | |
# Function to randomly select one sentence from the dataset | |
def get_random_sentence(): | |
# Select a random dialogue | |
random_dialogue = random.choice(dialogs['dialog']) | |
# Select a random sentence from the chosen dialogue | |
random_sentence = random.choice(random_dialogue) | |
# Clean the sentence | |
clean_random_sentence = clean_sentence(random_sentence) | |
return clean_random_sentence | |
# %% enro-app.ipynb 7 | |
import os | |
import firebase_admin | |
from firebase_admin import credentials | |
from firebase_admin import db | |
import json | |
import base64 | |
encoded_key = os.getenv('FIREBASE_KEY') | |
decoded_bytes = base64.b64decode(encoded_key) | |
firebase_creds = json.loads(decoded_bytes.decode('utf-8')) | |
if not firebase_admin._apps: | |
cred = credentials.Certificate(firebase_creds) | |
firebase_admin.initialize_app(cred, { | |
'databaseURL': 'https://ro-en-llm-default-rtdb.firebaseio.com/' | |
}) | |
# %% enro-app.ipynb 8 | |
def save_option_to_repo(trans_prompt, translation1, translation2, button): | |
ref = db.reference('dpo_feedback') | |
# Push new data to the database | |
if button == "Translation 1": | |
ref.push({ | |
'prompt': trans_prompt, | |
'chosen': translation1, | |
'rejected': translation2 | |
}) | |
if button == "Translation 2": | |
ref.push({ | |
'prompt': trans_prompt, | |
'chosen': translation2, | |
'rejected': translation1 | |
}) | |
def update_prompt(): | |
return get_random_sentence() | |
with gr.Blocks() as demo: | |
translations = [] | |
option_buttons = [] | |
with gr.Row(): | |
prompt = gr.components.Textbox(scale = 4) | |
example_sentence = gr.Button("Get Random Sentence", scale = 1) | |
with gr.Row(): | |
generate = gr.Button("Translate") | |
with gr.Row(equal_height=True): | |
translations.append(gr.components.Text(label=f"Translation 1")) | |
translations.append(gr.components.Text(label=f"Translation 2")) | |
with gr.Row(): | |
option_buttons.append(gr.Button(value="Translation 1")) | |
option_buttons.append(gr.Button(value="Equal")) | |
option_buttons.append(gr.Button(value="Translation 2")) | |
generate.click(get_translations, inputs=prompt, outputs=translations) | |
example_sentence.click(update_prompt, outputs=prompt) | |
for i in range(0,3): | |
option_buttons[i].click(save_option_to_repo, inputs=[prompt, translations[0], translations[1], option_buttons[i]]) | |
demo.launch() | |