coroianpetruta commited on
Commit
44f2a0a
1 Parent(s): 9bb6eef

Changed random sentence dataset

Browse files
Files changed (1) hide show
  1. app.py +36 -58
app.py CHANGED
@@ -1,8 +1,8 @@
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: enro-app.ipynb.
2
 
3
  # %% auto 0
4
- __all__ = ['model_name', 'tokenizer', 'model', 'example_translations', 'sentences', 'encoded_key', 'decoded_bytes',
5
- 'firebase_creds', 'get_translations', 'save_option_to_repo', 'get_random_sentence', 'update_prompt']
6
 
7
  # %% enro-app.ipynb 1
8
  from transformers import MarianMTModel, MarianTokenizer
@@ -28,60 +28,39 @@ def get_translations(input_text):
28
  return random_translations[0], random_translations[1]
29
 
30
  # %% enro-app.ipynb 5
31
- example_translations = sentences = [
32
- "The mouse jumped.",
33
- "The mouse ran away.",
34
- "The mouse squeaked loudly.",
35
- "The mouse hid quickly.",
36
- "The mouse nibbled cheese.",
37
- "The mouse dashed off.",
38
- "The mouse found crumbs.",
39
- "The mouse searched intently.",
40
- "The mouse peered out.",
41
- "The mouse sniffed carefully.",
42
- "The mouse climbed up.",
43
- "The mouse scurried along.",
44
- "The mouse escaped swiftly.",
45
- "The mouse burrowed deeply.",
46
- "The mouse explored curiously.",
47
- "That mouse squeaked.",
48
- "That mouse dashed away.",
49
- "That mouse looked around.",
50
- "That mouse gnawed wood.",
51
- "That mouse hid cleverly.",
52
- "That mouse chewed through.",
53
- "That mouse fled quickly.",
54
- "That mouse found food.",
55
- "That mouse evaded capture.",
56
- "That mouse peeked out.",
57
- "That mouse twitched nervously.",
58
- "That mouse made noise.",
59
- "That mouse crawled away.",
60
- "That mouse gnawed plastic.",
61
- "That mouse explored boldly.",
62
- "Mice are fast.",
63
- "Mice are clever.",
64
- "Mice are quick.",
65
- "Mice are tiny.",
66
- "Mice are playful.",
67
- "Mice are resourceful.",
68
- "Mice are shy.",
69
- "Mice are cute.",
70
- "Mice live together.",
71
- "Mice hide well.",
72
- "Mice avoid predators.",
73
- "Mice chew everything.",
74
- "Mice burrow underground.",
75
- "Mice love food.",
76
- "Mice squeak often.",
77
- "Mice find crumbs.",
78
- "Mice travel far.",
79
- "Mice reproduce rapidly.",
80
- "Mice build nests.",
81
- "Mice hide indoors."
82
- ]
83
 
84
  # %% enro-app.ipynb 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  import os
86
  import firebase_admin
87
  from firebase_admin import credentials
@@ -98,7 +77,7 @@ if not firebase_admin._apps:
98
  'databaseURL': 'https://ro-en-llm-default-rtdb.firebaseio.com/'
99
  })
100
 
101
- # %% enro-app.ipynb 7
102
  def save_option_to_repo(trans_prompt, translation1, translation2, button):
103
  ref = db.reference('dpo_feedback')
104
  # Push new data to the database
@@ -115,8 +94,7 @@ def save_option_to_repo(trans_prompt, translation1, translation2, button):
115
  'rejected': translation1
116
  })
117
 
118
- def get_random_sentence():
119
- return random.sample(example_translations, 1)[0]
120
 
121
 
122
  def update_prompt():
@@ -145,7 +123,7 @@ with gr.Blocks() as demo:
145
  option_buttons.append(gr.Button(value="Translation 2"))
146
 
147
  generate.click(get_translations, inputs=prompt, outputs=translations)
148
- example_sentence.click(get_random_sentence, outputs=prompt)
149
  for i in range(0,3):
150
  option_buttons[i].click(save_option_to_repo, inputs=[prompt, translations[0], translations[1], option_buttons[i]])
151
 
 
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: enro-app.ipynb.
2
 
3
  # %% auto 0
4
+ __all__ = ['model_name', 'tokenizer', 'model', 'dataset', 'dialogs', 'encoded_key', 'decoded_bytes', 'firebase_creds',
5
+ 'get_translations', 'clean_sentence', 'get_random_sentence', 'save_option_to_repo', 'update_prompt']
6
 
7
  # %% enro-app.ipynb 1
8
  from transformers import MarianMTModel, MarianTokenizer
 
28
  return random_translations[0], random_translations[1]
29
 
30
  # %% enro-app.ipynb 5
31
+ from datasets import load_dataset
32
+ dataset = load_dataset("daily_dialog")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # %% enro-app.ipynb 6
35
+ import re
36
+
37
+ dialogs = dataset["train"]
38
+ # Function to clean extra spaces around punctuation marks
39
+ def clean_sentence(sentence):
40
+ # Remove space before punctuation
41
+ sentence = re.sub(r'\s+([?.!,"\'-])', r'\1', sentence)
42
+ # Remove space after punctuation
43
+ sentence = re.sub(r'([?.!,"\'-])\s+', r'\1 ', sentence)
44
+ sentence = sentence.strip()
45
+ return sentence
46
+
47
+ # Assuming dialogues is a list of lists, where each inner list contains sentences of a dialogue
48
+ # Example: dialogues = [["Hello, how are you?", "I'm fine, thank you!"], ["What's your name?", "My name is John."]]
49
+
50
+ # Function to randomly select one sentence from the dataset
51
+ def get_random_sentence(dialogs):
52
+ # Select a random dialogue
53
+ random_dialogue = random.choice(dialogs['dialog'])
54
+ # Select a random sentence from the chosen dialogue
55
+ random_sentence = random.choice(random_dialogue)
56
+
57
+ # Clean the sentence
58
+ clean_random_sentence = clean_sentence(random_sentence)
59
+ return clean_random_sentence
60
+
61
+
62
+
63
+ # %% enro-app.ipynb 7
64
  import os
65
  import firebase_admin
66
  from firebase_admin import credentials
 
77
  'databaseURL': 'https://ro-en-llm-default-rtdb.firebaseio.com/'
78
  })
79
 
80
+ # %% enro-app.ipynb 8
81
  def save_option_to_repo(trans_prompt, translation1, translation2, button):
82
  ref = db.reference('dpo_feedback')
83
  # Push new data to the database
 
94
  'rejected': translation1
95
  })
96
 
97
+
 
98
 
99
 
100
  def update_prompt():
 
123
  option_buttons.append(gr.Button(value="Translation 2"))
124
 
125
  generate.click(get_translations, inputs=prompt, outputs=translations)
126
+ example_sentence.click(get_random_sentence(dialogs), outputs=prompt)
127
  for i in range(0,3):
128
  option_buttons[i].click(save_option_to_repo, inputs=[prompt, translations[0], translations[1], option_buttons[i]])
129