skoneru commited on
Commit
f506c0e
·
1 Parent(s): 489d465

Add application file

Browse files
Files changed (2) hide show
  1. app.py +86 -0
  2. description.md +13 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
+ from transformers import AutoTokenizer
6
+
7
+ client = InferenceClient(model="http://i13hpc68:8056")
8
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf", padding_side='left', cache_dir="/project/OML/skoneru/iwslt23/scripts/bloom/cache/",return_token_type_ids=False)
9
+
10
+
11
+ def inference(message):
12
+ output = client.text_generation(message, max_new_tokens=512)
13
+ return output.replace("<SS> ","\n")
14
+
15
+ def tokenize_len(sents):
16
+ return len(tokenizer.encode("\n".join(sents)))
17
+
18
+ def validate_inputs(source, sent_hyp, pe_hyp):
19
+ src_sents = source.split("\n")
20
+ sent_hyp_sents = sent_hyp.split("\n")
21
+ pe_hyp_sents = pe_hyp.split("\n")
22
+ max_len = 512
23
+
24
+ if len(src_sents) < 1:
25
+ return False
26
+
27
+ if len(sent_hyp_sents) != len(src_sents) or len(pe_hyp_sents) > len(src_sents):
28
+ return False
29
+
30
+ if tokenize_len(src_sents) > max_len or tokenize_len(sent_hyp_sents) > max_len or tokenize_len(pe_hyp_sents) > max_len:
31
+ return Flase
32
+
33
+ return True
34
+
35
+
36
+ def translate(source, sent_hyp, pe_hyp):
37
+
38
+ if validate_inputs(source, sent_hyp, pe_hyp):
39
+ prefix="English:\n"
40
+ suffix="\nGerman Translation:\n"
41
+ pe_suffix="\nPost-Edited Translation:\n"
42
+
43
+ source = " <SS> ".join(source.split("\n"))
44
+ sent_hyp = " <SS> ".join(sent_hyp.split("\n"))
45
+ pe_hyp = " <SS> ".join(pe_hyp.split("\n"))
46
+
47
+ prompt = prefix + source + "\n" + suffix + sent_hyp + "\n" + pe_suffix + "\n" + pe_hyp
48
+ else:
49
+ raise gr.Error("Please make sure that you meet the following conditions: Source and sentence level hypothesis lines are equal and the initial post-edited translation lines are less than source, The number of tokens in each box is less than 256.")
50
+
51
+
52
+ pe_hyp = "\n".join(pe_hyp.split(" <SS> "))
53
+ return pe_hyp + inference(prompt)
54
+
55
+
56
+ example_pronoun_false = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.",""]
57
+ example_pronoun_correct = ["- Yeah, but Rico's garland beat them all.\nIt was big.", "- Ja, aber Ricos Kränz war der schönste.\nEs war groß.","- Ja, aber Ricos Kranz"]
58
+
59
+
60
+ example_term_false = ["Lets talk about large language models.\nThese days, large language models can be used everywhere", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden",""]
61
+ example_term_correct = ["Lets talk about large language models.\nThese days, large language models can be used everywhere", "Lassen Sie uns über große Sprachmodelle sprechen.\nHeutzutage können große Sprachmodelle überall eingesetzt werden","Lassen Sie uns über LLMs sprechen."]
62
+
63
+ example_formal_false = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein",""]
64
+ example_formal_correct = ["You should be excited\nbut, calm down!\nyou must be careful","Sie sollten aufgeregt sein\naber beruhigen Sie sich!\ndu musst vorsichtig sein","Du solltest aufgeregt sein"]
65
+
66
+ with open('description.md',mode='r',encoding='utf-8') as f:
67
+ description = f.readlines()
68
+ description = "\n".join(description)
69
+
70
+ iface = gr.Interface(
71
+ fn=translate,
72
+ inputs=[gr.Textbox(lines=2, placeholder="Enter your English Senteces that you want to translate", label="English Sentences"), gr.Textbox(lines=2, placeholder="Enter your sentence-level German Tranlations that you want to post-edit using Llama2",label="Sentence-Level German Translations"),gr.Textbox(lines=2, placeholder="Enter your partially corrected translation and the model will continue from there - Can be left empty or generate the output once and correct it later :)", label="Manual Post-Edited German Translation")],
73
+ outputs=gr.Textbox(lines=2,placeholder="Enter your inputs and click submit!",label="Automatic Post-Edited German Translation"),
74
+ examples=[
75
+ example_term_false,
76
+ example_term_correct,
77
+ example_formal_false,
78
+ example_formal_correct,
79
+ example_pronoun_false,
80
+ example_pronoun_correct,
81
+ ],
82
+ title="Contextual Refinement of Translations: Integrating Manual Feedback",
83
+ description=description,
84
+ )
85
+
86
+ iface.launch(share=True)
description.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # What you need to use this demo?
2
+
3
+ 1. English Sentences that you want to translate at document-level
4
+ 2. Sentence-level German translations that Llama2 will refine at document-level
5
+
6
+ You can also simply play around with the examples :D
7
+
8
+ # Going through the examples!
9
+ We provide three different type of examples covering contextual phenomena such as terminology, formality level and pronoun disambiguation exploiting manual feedback.
10
+
11
+ - **Terminology (Example 1 & 2):** You can see that the term "Large Language Model" is translated into "große Sprachmodelle". Although this is correct, lets say you decide that it should be abreviated as LLMs in the German translations. Then, you correct the first sentence by replacing it with LLMs. The model then follows this and uses LLMs for the next sentence!
12
+ - **Formality (Example 3 & 4):** The word "you" in English can be translated to German as Sie (Formal) and Du (Informal). The sentence-level model outputs are inconsistent and use both levels. However, you see that the LLM outputs are only formal. If you want it to be informal, you can change the formality level of the first translation and then let the model generate from that point.
13
+ - **Pronouns (Example 5 & 6):** Here, the translation of it is unknown from the sentence alone and can be "Er", "Es" and "Sie" depending on the gender. Since, it is refering to garland, the right choice is Er. However, the LLM mistranslates the garland into "Krone" and uses "Sie" as the pronoun. However, once you fix the mistake and replace "Krone" with "Kränz", LLM refines the translation and use the right pronoun!