cswamy commited on
Commit
7623604
1 Parent(s): e69df1e

initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +47 -0
  3. marian_finetuned_kde4_enfr.pth +3 -0
  4. model.py +11 -0
  5. requirements.txt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ marian_finetuned_kde4_enfr.pth filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+
4
+ from model import create_marian_enfr
5
+
6
+ # Setup model and tokenizer
7
+ model, tokenizer = create_marian_enfr()
8
+
9
+ # Load state dict from model
10
+ model.load_state_dict(
11
+ torch.load(
12
+ f="marian_finetuned_kde4_enfr.pth",
13
+ map_location=torch.device("cpu")
14
+ ))
15
+
16
+ # Predict function
17
+ def predict(text:str):
18
+
19
+ # Tokenize inputs and get model outputs
20
+ input = tokenizer(text,
21
+ max_length=128,
22
+ truncation=True,
23
+ return_tensors="pt")
24
+ output_tokens = model.generate(**input)
25
+ output_text = tokenizer.batch_decode(output_tokens,
26
+ skip_special_tokens=True)
27
+
28
+ return output_text
29
+
30
+ # Create examples list
31
+ examples_list = ['What a beautiful day',
32
+ 'I love music']
33
+
34
+ # Create gradio app
35
+ title = "English to French translator"
36
+ description = "Marian model finetuned for english to french translation on the kde4 dataset."
37
+
38
+ demo = gr.Interface(fn=predict,
39
+ inputs=gr.inputs.Textbox(label="Input",
40
+ placeholder="Enter sentence here..."),
41
+ outputs="text",
42
+ examples=examples_list,
43
+ title=title,
44
+ description=description)
45
+
46
+ # Launch gradio
47
+ demo.launch()
marian_finetuned_kde4_enfr.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3fb97610cb79aa65b0f377810a298556309c239679cfac8222deca6bff682f
3
+ size 300875111
model.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+
3
+ def create_marian_enfr():
4
+ """
5
+ Initializes model and tokenizer.
6
+ """
7
+ checkpoint = 'Helsinki-NLP/opus-mt-en-fr'
8
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint, return_tensors='pt')
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
10
+
11
+ return model, tokenizer
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch==1.12.0
2
+ gradio==3.1.4
3
+ transformers==4.33.1