= commited on
Commit
c9c7b63
1 Parent(s): 781ba83

deploy app

Browse files
pages/page_1.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.markdown("Page 1")
streamlit_test.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5ForConditionalGeneration, T5TokenizerFast
2
+ from torch.utils.data import DataLoader
3
+ import streamlit as st
4
+ import torch
5
+ import os
6
+
7
+
8
+ # Let us define the main page
9
+ st.markdown("Translation page 🔠")
10
+
11
+ # Dropdown for the translation type
12
+ translation_type = st.sidebar.selectbox("Translation Type", options=["French ➡️ Wolof", "Wolof ➡️ French"])
13
+
14
+ # define a dictionary of versions
15
+ models = {
16
+ "Version ✌️": {
17
+ "French ➡️ Wolof": {
18
+ "checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v4",
19
+ "tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v4.json",
20
+ "max_len": None
21
+ }
22
+ },
23
+ "Version ☝️": {
24
+ "French ➡️ Wolof": {
25
+ "checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3",
26
+ "tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json",
27
+ "max_len": 51
28
+ }
29
+ }
30
+ }
31
+
32
+ # Dropdown for the model version
33
+ version = st.sidebar.selectbox("Model version", options=["Version ☝️", "Version ✌️"])
34
+
35
+ # Recuperate the number of sentences to provide
36
+ number = st.sidebar.number_input("Give the number of sentences that you want to provide", min_value = 1,
37
+ max_value = 100)
38
+
39
+ # Recuperate the number of sentences to provide
40
+ temperature = st.sidebar.slider("How randomly need you the translated sentences to be from 0% to 100%", min_value = 0,
41
+ max_value = 100)
42
+
43
+
44
+ # make the process
45
+ try:
46
+ # recuperate checkpoints
47
+ checkpoints = torch.load(os.path.join(models[version][translation_type]['checkpoints'], "best_checkpoints.pth"))
48
+
49
+ # recuperate the tokenizer
50
+ tokenizer_file = models[version][translation_type]['tokenizer']
51
+
52
+ # recuperate the max length
53
+ max_len = models[version][translation_type]['max_len']
54
+
55
+ # let us get the best model
56
+ @st.cache_resource
57
+ def get_model():
58
+
59
+ # initialize the tokenizer
60
+ tokenizer = T5TokenizerFast(tokenizer_file=tokenizer_file)
61
+
62
+ # initialize the model
63
+ model_name = 't5-small'
64
+
65
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
66
+
67
+ # resize the token embeddings
68
+ model.resize_token_embeddings(len(tokenizer))
69
+
70
+ model.load_state_dict(checkpoints['model_state_dict'])
71
+
72
+
73
+ return model, tokenizer
74
+
75
+ model, tokenizer = get_model()
76
+
77
+ # set the model to eval mode
78
+ _ = model.eval()
79
+
80
+ # Add a title
81
+ st.header("Translate French sentences onto Wolof 👌")
82
+
83
+
84
+ # Recuperate two columns
85
+ left, right = st.columns(2)
86
+
87
+ # recuperate sentences
88
+ left.subheader('Give me some sentences in French: ')
89
+
90
+ for i in range(number):
91
+
92
+ left.text_input(f"- Sentence number {i + 1}", key = f"sentence{i}")
93
+
94
+ # run model inference on all test data
95
+ original_translations, predicted_translations, original_texts, scores = [], [], [], {}
96
+
97
+ # print a sentence recuperated from the session
98
+ right.subheader("Translation to Wolof:")
99
+
100
+ for i in range(number):
101
+
102
+ sentence = st.session_state[f"sentence{i}"] + tokenizer.eos_token
103
+
104
+ if not sentence == "":
105
+
106
+ # Let us encode the sentences
107
+ encoding = tokenizer([sentence], return_tensors='pt', max_length=max_len, padding='max_length', truncation=True)
108
+
109
+ # Let us recuperate the input ids
110
+ input_ids = encoding.input_ids
111
+
112
+ # Let us recuperate the mask
113
+ mask = encoding.attention_mask
114
+
115
+ # Let us recuperate the pad token id
116
+ pad_token_id = tokenizer.pad_token_id
117
+
118
+ # perform prediction
119
+ predictions = model.generate(input_ids, do_sample = False, top_k = 50, max_length = max_len, top_p = 0.90,
120
+ temperature = temperature/100, num_return_sequences = 0, attention_mask = mask, pad_token_id = pad_token_id)
121
+
122
+ # decode the predictions
123
+ predicted_sentence = tokenizer.batch_decode(predictions, skip_special_tokens = True)
124
+
125
+ # provide the prediction
126
+ right.write(f"{i+1}. {predicted_sentence[0]}")
127
+
128
+ else:
129
+
130
+ # provide the prediction
131
+ right.write(f"{i+1}. ")
132
+
133
+ except Exception as e:
134
+
135
+ st.warning("The chosen model is not available yet !", icon = "⚠️")
136
+
137
+ # st.write(e)
138
+
139
+
140
+
wolof-translate/setup.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from setuptools import setup
2
+
3
+ setup(name="wolof_translate", version="0.0.1", author="Oumar Kane", author_email="oumar.kane@univ-thies.sn",
4
+ description="Contain function and classes to process corpora for making translation between wolof text and other languages.",
5
+ requires=['spacy', 'nltk', 'gensim'])
wolof-translate/wolof_translate.egg-info/PKG-INFO ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: wolof-translate
3
+ Version: 0.0.1
4
+ Summary: Contain function and classes to process corpora for making translation between wolof text and other languages.
5
+ Author: Oumar Kane
6
+ Author-email: oumar.kane@univ-thies.sn
7
+ Requires: spacy
8
+ Requires: nltk
9
+ Requires: gensim
wolof-translate/wolof_translate.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setup.py
2
+ wolof_translate/__init__.py
3
+ wolof_translate.egg-info/PKG-INFO
4
+ wolof_translate.egg-info/SOURCES.txt
5
+ wolof_translate.egg-info/dependency_links.txt
6
+ wolof_translate.egg-info/top_level.txt
7
+ wolof_translate/__pycache__/__init__.cpython-310.pyc
8
+ wolof_translate/__pycache__/dataset_v1.cpython-310.pyc
9
+ wolof_translate/__pycache__/sent_transformers.cpython-310.pyc
10
+ wolof_translate/data/__init__.py
11
+ wolof_translate/data/dataset_v1.py
12
+ wolof_translate/data/dataset_v2.py
13
+ wolof_translate/data/dataset_v3.py
14
+ wolof_translate/data/__pycache__/__init__.cpython-310.pyc
15
+ wolof_translate/data/__pycache__/dataset_v1.cpython-310.pyc
16
+ wolof_translate/data/__pycache__/dataset_v2.cpython-310.pyc
17
+ wolof_translate/models/__init__.py
18
+ wolof_translate/models/__pycache__/__init__.cpython-310.pyc
19
+ wolof_translate/models/transformers/__init__.py
20
+ wolof_translate/models/transformers/main.py
21
+ wolof_translate/models/transformers/optimization.py
22
+ wolof_translate/models/transformers/position.py
23
+ wolof_translate/models/transformers/size.py
24
+ wolof_translate/models/transformers/__pycache__/__init__.cpython-310.pyc
25
+ wolof_translate/models/transformers/__pycache__/main.cpython-310.pyc
26
+ wolof_translate/models/transformers/__pycache__/optimization.cpython-310.pyc
27
+ wolof_translate/models/transformers/__pycache__/position.cpython-310.pyc
28
+ wolof_translate/models/transformers/__pycache__/size.cpython-310.pyc
29
+ wolof_translate/pipe/__init__.py
30
+ wolof_translate/pipe/nlp_pipeline.py
31
+ wolof_translate/tokenizers/__init__.py
32
+ wolof_translate/tokenizers/adverse_tokenizer.json
33
+ wolof_translate/tokenizers/tokenizer_v1.json
34
+ wolof_translate/tokenizers/__pycache__/__init__.cpython-310.pyc
35
+ wolof_translate/tokenizers/t5_tokenizers/fr_tokenizer_v1.json
36
+ wolof_translate/tokenizers/t5_tokenizers/tokenizer_v1.json
37
+ wolof_translate/tokenizers/t5_tokenizers/tokenizer_v2.json
38
+ wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json
39
+ wolof_translate/tokenizers/t5_tokenizers/wf_tokenizer_v1.json
40
+ wolof_translate/trainers/__init__.py
41
+ wolof_translate/trainers/transformer_trainer.py
42
+ wolof_translate/trainers/__pycache__/__init__.cpython-310.pyc
43
+ wolof_translate/trainers/__pycache__/transformer_trainer.cpython-310.pyc
44
+ wolof_translate/utils/__init__.py
45
+ wolof_translate/utils/evaluation.py
46
+ wolof_translate/utils/extract_poems.py
47
+ wolof_translate/utils/extract_sentences.py
48
+ wolof_translate/utils/sent_corrections.py
49
+ wolof_translate/utils/sent_transformers.py
50
+ wolof_translate/utils/sent_unification.py
51
+ wolof_translate/utils/split_with_valid.py
52
+ wolof_translate/utils/tokenize_text.py
53
+ wolof_translate/utils/__pycache__/__init__.cpython-310.pyc
54
+ wolof_translate/utils/__pycache__/evaluation.cpython-310.pyc
55
+ wolof_translate/utils/__pycache__/sent_corrections.cpython-310.pyc
56
+ wolof_translate/utils/__pycache__/sent_transformers.cpython-310.pyc
57
+ wolof_translate/utils/__pycache__/sent_unification.cpython-310.pyc
58
+ wolof_translate/utils/__pycache__/split_with_valid.cpython-310.pyc
59
+ wolof_translate/utils/__pycache__/tokenize_text.cpython-310.pyc
wolof-translate/wolof_translate.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
wolof-translate/wolof_translate.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ wolof_translate
wolof-translate/wolof_translate/__init__.py ADDED
File without changes
wolof-translate/wolof_translate/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (244 Bytes). View file
 
wolof-translate/wolof_translate/__pycache__/dataset_v1.cpython-310.pyc ADDED
Binary file (2.82 kB). View file
 
wolof-translate/wolof_translate/__pycache__/sent_transformers.cpython-310.pyc ADDED
Binary file (949 Bytes). View file
 
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metrics": {
3
+ "train_loss": 0.004466977413735216,
4
+ "test_loss": 0.5528496630489826,
5
+ "bleu": 24.9553,
6
+ "gen_len": 7.774,
7
+ "current_epoch": 759
8
+ },
9
+ "best_performance": {
10
+ "best_score": 24.9553,
11
+ "best_epoch": 759
12
+ }
13
+ }
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5d9617eba185df15b75da1871c2f2b5d2ab32eb089c21438228e8bcfac1595
3
+ size 540763111