barbaroo commited on
Commit
79787d6
1 Parent(s): 873d959

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -7
app.py CHANGED
@@ -11,6 +11,8 @@ nltk.download('punkt')
11
  # Load the models and tokenizers
12
  model_checkpoint_fo_en = "barbaroo/nllb_200_600M_fo_en"
13
  model_checkpoint_en_fo = "barbaroo/nllb_200_600M_en_fo"
 
 
14
 
15
  tokenizer_fo_en = AutoTokenizer.from_pretrained(model_checkpoint_fo_en)
16
  model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)
@@ -18,14 +20,22 @@ model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)
18
  tokenizer_en_fo = AutoTokenizer.from_pretrained(model_checkpoint_en_fo)
19
  model_en_fo = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_fo)
20
 
 
 
 
 
 
 
21
  # Check if a GPU is available and move models to GPU if possible
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  print(f"Using device: {device}")
24
 
25
  if torch.cuda.is_available():
26
  print("GPU is available. Initializing models on GPU.")
27
- #model_fo_en.to(device)
28
- #model_en_fo.to(device)
 
 
29
  else:
30
  print("GPU is not available. Using CPU.")
31
 
@@ -34,7 +44,7 @@ def split_into_sentences(text):
34
 
35
  @spaces.GPU
36
  def translate(text, model, tokenizer, max_length=80):
37
- # Make sure model and tokenizer are on the correct device
38
  model.to(device)
39
 
40
  sentences = split_into_sentences(text)
@@ -63,9 +73,29 @@ def handle_input(text, file, direction):
63
  if direction == "fo_en":
64
  model = model_fo_en
65
  tokenizer = tokenizer_fo_en
66
- else:
67
  model = model_en_fo
68
  tokenizer = tokenizer_en_fo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Translate the text if it's not empty
71
  if text:
@@ -79,11 +109,11 @@ iface = gr.Interface(
79
  inputs=[
80
  gr.Textbox(lines=2, placeholder="Type here or upload a text file..."),
81
  gr.File(label="or Upload Text File", type="binary"),
82
- gr.Dropdown(label="Translation Direction", choices=["fo_en", "en_fo"], value="fo_en")
83
  ],
84
  outputs="text",
85
- title="Bidirectional Translator",
86
- description="Enter text directly or upload a text file (.txt) to translate between Faroese and English."
87
  )
88
 
89
  # Launch the interface
 
11
  # Load the models and tokenizers
12
  model_checkpoint_fo_en = "barbaroo/nllb_200_600M_fo_en"
13
  model_checkpoint_en_fo = "barbaroo/nllb_200_600M_en_fo"
14
+ model_checkpoint_uk_en = "Helsinki-NLP/opus-mt-uk-en"
15
+ model_checkpoint_en_uk = "Helsinki-NLP/opus-mt-en-uk"
16
 
17
  tokenizer_fo_en = AutoTokenizer.from_pretrained(model_checkpoint_fo_en)
18
  model_fo_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_fo_en)
 
20
  tokenizer_en_fo = AutoTokenizer.from_pretrained(model_checkpoint_en_fo)
21
  model_en_fo = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_fo)
22
 
23
+ tokenizer_uk_en = AutoTokenizer.from_pretrained(model_checkpoint_uk_en)
24
+ model_uk_en = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_uk_en)
25
+
26
+ tokenizer_en_uk = AutoTokenizer.from_pretrained(model_checkpoint_en_uk)
27
+ model_en_uk = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_en_uk)
28
+
29
  # Check if a GPU is available and move models to GPU if possible
30
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
  print(f"Using device: {device}")
32
 
33
  if torch.cuda.is_available():
34
  print("GPU is available. Initializing models on GPU.")
35
+ model_fo_en.to(device)
36
+ model_en_fo.to(device)
37
+ model_uk_en.to(device)
38
+ model_en_uk.to(device)
39
  else:
40
  print("GPU is not available. Using CPU.")
41
 
 
44
 
45
  @spaces.GPU
46
  def translate(text, model, tokenizer, max_length=80):
47
+ # Ensure model is on the correct device
48
  model.to(device)
49
 
50
  sentences = split_into_sentences(text)
 
73
  if direction == "fo_en":
74
  model = model_fo_en
75
  tokenizer = tokenizer_fo_en
76
+ elif direction == "en_fo":
77
  model = model_en_fo
78
  tokenizer = tokenizer_en_fo
79
+ elif direction == "uk_en":
80
+ model = model_uk_en
81
+ tokenizer = tokenizer_uk_en
82
+ elif direction == "en_uk":
83
+ model = model_en_uk
84
+ tokenizer = tokenizer_en_uk
85
+ elif direction == "uk_fo":
86
+ # Ukrainian to Faroese via English pivot
87
+ model = model_uk_en
88
+ tokenizer = tokenizer_uk_en
89
+ text = translate(text, model, tokenizer)
90
+ model = model_en_fo
91
+ tokenizer = tokenizer_en_fo
92
+ elif direction == "fo_uk":
93
+ # Faroese to Ukrainian via English pivot
94
+ model = model_fo_en
95
+ tokenizer = tokenizer_fo_en
96
+ text = translate(text, model, tokenizer)
97
+ model = model_en_uk
98
+ tokenizer = tokenizer_en_uk
99
 
100
  # Translate the text if it's not empty
101
  if text:
 
109
  inputs=[
110
  gr.Textbox(lines=2, placeholder="Type here or upload a text file..."),
111
  gr.File(label="or Upload Text File", type="binary"),
112
+ gr.Dropdown(label="Translation Direction", choices=["fo_en", "en_fo", "uk_en", "en_uk", "uk_fo", "fo_uk"], value="fo_en")
113
  ],
114
  outputs="text",
115
+ title="Multilingual Translator",
116
+ description="Enter text directly or upload a text file (.txt) to translate between Faroese, Ukrainian, and English."
117
  )
118
 
119
  # Launch the interface