Spaces:
Sleeping
Sleeping
coroianpetruta
commited on
Commit
•
44f2a0a
1
Parent(s):
9bb6eef
Changed random sentence dataset
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: enro-app.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
-
__all__ = ['model_name', 'tokenizer', 'model', '
|
5 |
-
'
|
6 |
|
7 |
# %% enro-app.ipynb 1
|
8 |
from transformers import MarianMTModel, MarianTokenizer
|
@@ -28,60 +28,39 @@ def get_translations(input_text):
|
|
28 |
return random_translations[0], random_translations[1]
|
29 |
|
30 |
# %% enro-app.ipynb 5
|
31 |
-
|
32 |
-
|
33 |
-
"The mouse ran away.",
|
34 |
-
"The mouse squeaked loudly.",
|
35 |
-
"The mouse hid quickly.",
|
36 |
-
"The mouse nibbled cheese.",
|
37 |
-
"The mouse dashed off.",
|
38 |
-
"The mouse found crumbs.",
|
39 |
-
"The mouse searched intently.",
|
40 |
-
"The mouse peered out.",
|
41 |
-
"The mouse sniffed carefully.",
|
42 |
-
"The mouse climbed up.",
|
43 |
-
"The mouse scurried along.",
|
44 |
-
"The mouse escaped swiftly.",
|
45 |
-
"The mouse burrowed deeply.",
|
46 |
-
"The mouse explored curiously.",
|
47 |
-
"That mouse squeaked.",
|
48 |
-
"That mouse dashed away.",
|
49 |
-
"That mouse looked around.",
|
50 |
-
"That mouse gnawed wood.",
|
51 |
-
"That mouse hid cleverly.",
|
52 |
-
"That mouse chewed through.",
|
53 |
-
"That mouse fled quickly.",
|
54 |
-
"That mouse found food.",
|
55 |
-
"That mouse evaded capture.",
|
56 |
-
"That mouse peeked out.",
|
57 |
-
"That mouse twitched nervously.",
|
58 |
-
"That mouse made noise.",
|
59 |
-
"That mouse crawled away.",
|
60 |
-
"That mouse gnawed plastic.",
|
61 |
-
"That mouse explored boldly.",
|
62 |
-
"Mice are fast.",
|
63 |
-
"Mice are clever.",
|
64 |
-
"Mice are quick.",
|
65 |
-
"Mice are tiny.",
|
66 |
-
"Mice are playful.",
|
67 |
-
"Mice are resourceful.",
|
68 |
-
"Mice are shy.",
|
69 |
-
"Mice are cute.",
|
70 |
-
"Mice live together.",
|
71 |
-
"Mice hide well.",
|
72 |
-
"Mice avoid predators.",
|
73 |
-
"Mice chew everything.",
|
74 |
-
"Mice burrow underground.",
|
75 |
-
"Mice love food.",
|
76 |
-
"Mice squeak often.",
|
77 |
-
"Mice find crumbs.",
|
78 |
-
"Mice travel far.",
|
79 |
-
"Mice reproduce rapidly.",
|
80 |
-
"Mice build nests.",
|
81 |
-
"Mice hide indoors."
|
82 |
-
]
|
83 |
|
84 |
# %% enro-app.ipynb 6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
import os
|
86 |
import firebase_admin
|
87 |
from firebase_admin import credentials
|
@@ -98,7 +77,7 @@ if not firebase_admin._apps:
|
|
98 |
'databaseURL': 'https://ro-en-llm-default-rtdb.firebaseio.com/'
|
99 |
})
|
100 |
|
101 |
-
# %% enro-app.ipynb
|
102 |
def save_option_to_repo(trans_prompt, translation1, translation2, button):
|
103 |
ref = db.reference('dpo_feedback')
|
104 |
# Push new data to the database
|
@@ -115,8 +94,7 @@ def save_option_to_repo(trans_prompt, translation1, translation2, button):
|
|
115 |
'rejected': translation1
|
116 |
})
|
117 |
|
118 |
-
|
119 |
-
return random.sample(example_translations, 1)[0]
|
120 |
|
121 |
|
122 |
def update_prompt():
|
@@ -145,7 +123,7 @@ with gr.Blocks() as demo:
|
|
145 |
option_buttons.append(gr.Button(value="Translation 2"))
|
146 |
|
147 |
generate.click(get_translations, inputs=prompt, outputs=translations)
|
148 |
-
example_sentence.click(get_random_sentence, outputs=prompt)
|
149 |
for i in range(0,3):
|
150 |
option_buttons[i].click(save_option_to_repo, inputs=[prompt, translations[0], translations[1], option_buttons[i]])
|
151 |
|
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: enro-app.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
+
__all__ = ['model_name', 'tokenizer', 'model', 'dataset', 'dialogs', 'encoded_key', 'decoded_bytes', 'firebase_creds',
|
5 |
+
'get_translations', 'clean_sentence', 'get_random_sentence', 'save_option_to_repo', 'update_prompt']
|
6 |
|
7 |
# %% enro-app.ipynb 1
|
8 |
from transformers import MarianMTModel, MarianTokenizer
|
|
|
28 |
return random_translations[0], random_translations[1]
|
29 |
|
30 |
# %% enro-app.ipynb 5
|
31 |
+
from datasets import load_dataset
|
32 |
+
dataset = load_dataset("daily_dialog")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# %% enro-app.ipynb 6
|
35 |
+
import re
|
36 |
+
|
37 |
+
dialogs = dataset["train"]
|
38 |
+
# Function to clean extra spaces around punctuation marks
|
39 |
+
def clean_sentence(sentence):
|
40 |
+
# Remove space before punctuation
|
41 |
+
sentence = re.sub(r'\s+([?.!,"\'-])', r'\1', sentence)
|
42 |
+
# Remove space after punctuation
|
43 |
+
sentence = re.sub(r'([?.!,"\'-])\s+', r'\1 ', sentence)
|
44 |
+
sentence = sentence.strip()
|
45 |
+
return sentence
|
46 |
+
|
47 |
+
# Assuming dialogues is a list of lists, where each inner list contains sentences of a dialogue
|
48 |
+
# Example: dialogues = [["Hello, how are you?", "I'm fine, thank you!"], ["What's your name?", "My name is John."]]
|
49 |
+
|
50 |
+
# Function to randomly select one sentence from the dataset
|
51 |
+
def get_random_sentence(dialogs):
|
52 |
+
# Select a random dialogue
|
53 |
+
random_dialogue = random.choice(dialogs['dialog'])
|
54 |
+
# Select a random sentence from the chosen dialogue
|
55 |
+
random_sentence = random.choice(random_dialogue)
|
56 |
+
|
57 |
+
# Clean the sentence
|
58 |
+
clean_random_sentence = clean_sentence(random_sentence)
|
59 |
+
return clean_random_sentence
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
# %% enro-app.ipynb 7
|
64 |
import os
|
65 |
import firebase_admin
|
66 |
from firebase_admin import credentials
|
|
|
77 |
'databaseURL': 'https://ro-en-llm-default-rtdb.firebaseio.com/'
|
78 |
})
|
79 |
|
80 |
+
# %% enro-app.ipynb 8
|
81 |
def save_option_to_repo(trans_prompt, translation1, translation2, button):
|
82 |
ref = db.reference('dpo_feedback')
|
83 |
# Push new data to the database
|
|
|
94 |
'rejected': translation1
|
95 |
})
|
96 |
|
97 |
+
|
|
|
98 |
|
99 |
|
100 |
def update_prompt():
|
|
|
123 |
option_buttons.append(gr.Button(value="Translation 2"))
|
124 |
|
125 |
generate.click(get_translations, inputs=prompt, outputs=translations)
|
126 |
+
example_sentence.click(get_random_sentence(dialogs), outputs=prompt)
|
127 |
for i in range(0,3):
|
128 |
option_buttons[i].click(save_option_to_repo, inputs=[prompt, translations[0], translations[1], option_buttons[i]])
|
129 |
|