Vijayendra
commited on
Commit
•
1529d2a
1
Parent(s):
e2fb9d7
Update README.md
Browse files
README.md
CHANGED
@@ -18,24 +18,14 @@ model_name = "SynapseQAI/T5-base-wmt14"
|
|
18 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
19 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
20 |
|
21 |
-
# Function to translate using
|
22 |
-
def translate(sentence
|
23 |
# Prepare the input for the model
|
24 |
-
input_text = f"
|
25 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
outputs = model.generate(input_ids, num_beams=3, max_length=50, early_stopping=True)
|
30 |
-
elif strategy == 'top_k_sampling':
|
31 |
-
outputs = model.generate(input_ids, do_sample=True, top_k=50, max_length=50)
|
32 |
-
elif strategy == 'top_p_sampling':
|
33 |
-
outputs = model.generate(input_ids, do_sample=True, top_p=0.92, max_length=50)
|
34 |
-
elif strategy == 'temperature_sampling':
|
35 |
-
outputs = model.generate(input_ids, do_sample=True, temperature=0.7, max_length=50)
|
36 |
-
else:
|
37 |
-
# Default to greedy decoding
|
38 |
-
outputs = model.generate(input_ids, max_length=50)
|
39 |
|
40 |
# Decode the generated translation
|
41 |
translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
@@ -43,23 +33,18 @@ def translate(sentence, strategy='beam_search'):
|
|
43 |
|
44 |
# French sentences from easy to advanced
|
45 |
sentences = [
|
|
|
|
|
|
|
|
|
|
|
46 |
"Il fait beau aujourd'hui.",
|
47 |
"J'aime lire des livres et regarder des films pendant mon temps libre.",
|
48 |
"Si j'avais su que tu venais, j'aurais préparé quelque chose de spécial pour le dîner.",
|
49 |
"Même si les avancées technologiques apportent de nombreux avantages, elles posent également des défis éthiques considérables qu'il nous faut relever."
|
50 |
]
|
51 |
|
52 |
-
# Translate each sentence
|
53 |
for sentence in sentences:
|
54 |
-
translated_sentence = translate(sentence
|
55 |
-
print(f"French: {sentence}\nEnglish
|
56 |
-
|
57 |
-
translated_sentence = translate(sentence, strategy='top_k_sampling')
|
58 |
-
print(f"English (Top-k Sampling): {translated_sentence}\n")
|
59 |
-
|
60 |
-
translated_sentence = translate(sentence, strategy='top_p_sampling')
|
61 |
-
print(f"English (Top-p Sampling): {translated_sentence}\n")
|
62 |
-
|
63 |
-
translated_sentence = translate(sentence, strategy='temperature_sampling')
|
64 |
-
print(f"English (Temperature Sampling): {translated_sentence}\n")
|
65 |
-
|
|
|
18 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
19 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
20 |
|
21 |
+
# Function to translate using beam search (default strategy)
|
22 |
+
def translate(sentence):
|
23 |
# Prepare the input for the model
|
24 |
+
input_text = f": {sentence}"
|
25 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
26 |
|
27 |
+
# Generate translation using beam search
|
28 |
+
outputs = model.generate(input_ids, num_beams=3, max_length=50, early_stopping=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Decode the generated translation
|
31 |
translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
33 |
|
34 |
# French sentences from easy to advanced
|
35 |
sentences = [
|
36 |
+
"Le soleil se lève à l'est et se couche à l'ouest.",
|
37 |
+
"Les scientifiques travaillent dur pour trouver un remède.",
|
38 |
+
"La capitale de la France est Paris.",
|
39 |
+
"Il a plu toute la journée hier.",
|
40 |
+
"Je voudrais un café s'il vous plaît.",
|
41 |
"Il fait beau aujourd'hui.",
|
42 |
"J'aime lire des livres et regarder des films pendant mon temps libre.",
|
43 |
"Si j'avais su que tu venais, j'aurais préparé quelque chose de spécial pour le dîner.",
|
44 |
"Même si les avancées technologiques apportent de nombreux avantages, elles posent également des défis éthiques considérables qu'il nous faut relever."
|
45 |
]
|
46 |
|
47 |
+
# Translate each sentence and print the best translation
|
48 |
for sentence in sentences:
|
49 |
+
translated_sentence = translate(sentence)
|
50 |
+
print(f"French: {sentence}\nEnglish: {translated_sentence}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|