Spaces:

Oumar199
/

Translation_French_Wolof

Sleeping

App Files Files Community

= commited on May 22, 2023

Commit

c9c7b63

•

1 Parent(s): 781ba83

deploy app

Browse files

Files changed (13) hide show

pages/page_1.py +3 -0
streamlit_test.py +140 -0
wolof-translate/setup.py +5 -0
wolof-translate/wolof_translate.egg-info/PKG-INFO +9 -0
wolof-translate/wolof_translate.egg-info/SOURCES.txt +59 -0
wolof-translate/wolof_translate.egg-info/dependency_links.txt +1 -0
wolof-translate/wolof_translate.egg-info/top_level.txt +1 -0
wolof-translate/wolof_translate/__init__.py +0 -0
wolof-translate/wolof_translate/__pycache__/__init__.cpython-310.pyc +0 -0
wolof-translate/wolof_translate/__pycache__/dataset_v1.cpython-310.pyc +0 -0
wolof-translate/wolof_translate/__pycache__/sent_transformers.cpython-310.pyc +0 -0
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.json +13 -0
wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.pth +3 -0

pages/page_1.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import streamlit as st
2	+
3	+ st.markdown("Page 1")

streamlit_test.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from transformers import T5ForConditionalGeneration, T5TokenizerFast
+from torch.utils.data import DataLoader
+import streamlit as st
+import torch
+import os
+# Let us define the main page
+st.markdown("Translation page 🔠")
+# Dropdown for the translation type
+translation_type = st.sidebar.selectbox("Translation Type", options=["French ➡️ Wolof", "Wolof ➡️ French"])
+# define a dictionary of versions
+models = {
+    "Version ✌️": {
+        "French ➡️ Wolof": {
+            "checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v4",
+            "tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v4.json",
+            "max_len": None
+        }
+    },
+    "Version ☝️": {
+        "French ➡️ Wolof": {
+            "checkpoints": "wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3",
+            "tokenizer": "wolof-translate/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json",
+            "max_len": 51
+            }
+    }
+}
+# Dropdown for the model version
+version = st.sidebar.selectbox("Model version", options=["Version ☝️", "Version ✌️"])
+# Recuperate the number of sentences to provide
+number = st.sidebar.number_input("Give the number of sentences that you want to provide", min_value = 1,
+          max_value = 100)
+# Recuperate the number of sentences to provide
+temperature = st.sidebar.slider("How randomly need you the translated sentences to be from 0% to 100%", min_value = 0,
+          max_value = 100)
+# make the process
+try:
+    # recuperate checkpoints
+    checkpoints = torch.load(os.path.join(models[version][translation_type]['checkpoints'], "best_checkpoints.pth"))
+    # recuperate the tokenizer
+    tokenizer_file = models[version][translation_type]['tokenizer']
+    # recuperate the max length
+    max_len = models[version][translation_type]['max_len']
+    # let us get the best model
+    @st.cache_resource
+    def get_model():
+        # initialize the tokenizer
+        tokenizer = T5TokenizerFast(tokenizer_file=tokenizer_file)
+        # initialize the model
+        model_name = 't5-small'
+        model = T5ForConditionalGeneration.from_pretrained(model_name)
+        # resize the token embeddings
+        model.resize_token_embeddings(len(tokenizer))
+        model.load_state_dict(checkpoints['model_state_dict'])
+        return model, tokenizer
+    model, tokenizer = get_model()
+    # set the model to eval mode
+    _ = model.eval()
+    # Add a title
+    st.header("Translate French sentences onto Wolof 👌")
+    # Recuperate two columns
+    left, right = st.columns(2)
+    # recuperate sentences
+    left.subheader('Give me some sentences in French: ')
+    for i in range(number):
+        left.text_input(f"- Sentence number {i + 1}", key = f"sentence{i}")
+    # run model inference on all test data
+    original_translations, predicted_translations, original_texts, scores = [], [], [], {}
+    # print a sentence recuperated from the session
+    right.subheader("Translation to Wolof:")
+    for i in range(number):
+        sentence = st.session_state[f"sentence{i}"] + tokenizer.eos_token
+        if not sentence == "":
+            # Let us encode the sentences
+            encoding = tokenizer([sentence], return_tensors='pt', max_length=max_len, padding='max_length', truncation=True)
+            # Let us recuperate the input ids
+            input_ids = encoding.input_ids
+            # Let us recuperate the mask
+            mask = encoding.attention_mask
+            # Let us recuperate the pad token id
+            pad_token_id = tokenizer.pad_token_id
+            # perform prediction
+            predictions = model.generate(input_ids, do_sample = False, top_k = 50, max_length = max_len, top_p = 0.90,
+                                            temperature = temperature/100, num_return_sequences = 0, attention_mask = mask, pad_token_id = pad_token_id)
+            # decode the predictions
+            predicted_sentence = tokenizer.batch_decode(predictions, skip_special_tokens = True)
+            # provide the prediction
+            right.write(f"{i+1}. {predicted_sentence[0]}")
+        else:
+            # provide the prediction
+            right.write(f"{i+1}. ")
+except Exception as e:
+    st.warning("The chosen model is not available yet !", icon = "⚠️")
+    # st.write(e)

wolof-translate/setup.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from setuptools import setup
+setup(name="wolof_translate", version="0.0.1", author="Oumar Kane", author_email="oumar.kane@univ-thies.sn",
+      description="Contain function and classes to process corpora for making translation between wolof text and other languages.",
+      requires=['spacy', 'nltk', 'gensim'])

wolof-translate/wolof_translate.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,9 @@

+Metadata-Version: 2.1
+Name: wolof-translate
+Version: 0.0.1
+Summary: Contain function and classes to process corpora for making translation between wolof text and other languages.
+Author: Oumar Kane
+Author-email: oumar.kane@univ-thies.sn
+Requires: spacy
+Requires: nltk
+Requires: gensim

wolof-translate/wolof_translate.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,59 @@

+setup.py
+wolof_translate/__init__.py
+wolof_translate.egg-info/PKG-INFO
+wolof_translate.egg-info/SOURCES.txt
+wolof_translate.egg-info/dependency_links.txt
+wolof_translate.egg-info/top_level.txt
+wolof_translate/__pycache__/__init__.cpython-310.pyc
+wolof_translate/__pycache__/dataset_v1.cpython-310.pyc
+wolof_translate/__pycache__/sent_transformers.cpython-310.pyc
+wolof_translate/data/__init__.py
+wolof_translate/data/dataset_v1.py
+wolof_translate/data/dataset_v2.py
+wolof_translate/data/dataset_v3.py
+wolof_translate/data/__pycache__/__init__.cpython-310.pyc
+wolof_translate/data/__pycache__/dataset_v1.cpython-310.pyc
+wolof_translate/data/__pycache__/dataset_v2.cpython-310.pyc
+wolof_translate/models/__init__.py
+wolof_translate/models/__pycache__/__init__.cpython-310.pyc
+wolof_translate/models/transformers/__init__.py
+wolof_translate/models/transformers/main.py
+wolof_translate/models/transformers/optimization.py
+wolof_translate/models/transformers/position.py
+wolof_translate/models/transformers/size.py
+wolof_translate/models/transformers/__pycache__/__init__.cpython-310.pyc
+wolof_translate/models/transformers/__pycache__/main.cpython-310.pyc
+wolof_translate/models/transformers/__pycache__/optimization.cpython-310.pyc
+wolof_translate/models/transformers/__pycache__/position.cpython-310.pyc
+wolof_translate/models/transformers/__pycache__/size.cpython-310.pyc
+wolof_translate/pipe/__init__.py
+wolof_translate/pipe/nlp_pipeline.py
+wolof_translate/tokenizers/__init__.py
+wolof_translate/tokenizers/adverse_tokenizer.json
+wolof_translate/tokenizers/tokenizer_v1.json
+wolof_translate/tokenizers/__pycache__/__init__.cpython-310.pyc
+wolof_translate/tokenizers/t5_tokenizers/fr_tokenizer_v1.json
+wolof_translate/tokenizers/t5_tokenizers/tokenizer_v1.json
+wolof_translate/tokenizers/t5_tokenizers/tokenizer_v2.json
+wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.json
+wolof_translate/tokenizers/t5_tokenizers/wf_tokenizer_v1.json
+wolof_translate/trainers/__init__.py
+wolof_translate/trainers/transformer_trainer.py
+wolof_translate/trainers/__pycache__/__init__.cpython-310.pyc
+wolof_translate/trainers/__pycache__/transformer_trainer.cpython-310.pyc
+wolof_translate/utils/__init__.py
+wolof_translate/utils/evaluation.py
+wolof_translate/utils/extract_poems.py
+wolof_translate/utils/extract_sentences.py
+wolof_translate/utils/sent_corrections.py
+wolof_translate/utils/sent_transformers.py
+wolof_translate/utils/sent_unification.py
+wolof_translate/utils/split_with_valid.py
+wolof_translate/utils/tokenize_text.py
+wolof_translate/utils/__pycache__/__init__.cpython-310.pyc
+wolof_translate/utils/__pycache__/evaluation.cpython-310.pyc
+wolof_translate/utils/__pycache__/sent_corrections.cpython-310.pyc
+wolof_translate/utils/__pycache__/sent_transformers.cpython-310.pyc
+wolof_translate/utils/__pycache__/sent_unification.cpython-310.pyc
+wolof_translate/utils/__pycache__/split_with_valid.cpython-310.pyc
+wolof_translate/utils/__pycache__/tokenize_text.cpython-310.pyc

wolof-translate/wolof_translate.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

wolof-translate/wolof_translate.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ wolof_translate

wolof-translate/wolof_translate/__init__.py ADDED Viewed

File without changes

wolof-translate/wolof_translate/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (244 Bytes). View file

wolof-translate/wolof_translate/__pycache__/dataset_v1.cpython-310.pyc ADDED Viewed

Binary file (2.82 kB). View file

wolof-translate/wolof_translate/__pycache__/sent_transformers.cpython-310.pyc ADDED Viewed

Binary file (949 Bytes). View file

wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "metrics": {
+        "train_loss": 0.004466977413735216,
+        "test_loss": 0.5528496630489826,
+        "bleu": 24.9553,
+        "gen_len": 7.774,
+        "current_epoch": 759
+    },
+    "best_performance": {
+        "best_score": 24.9553,
+        "best_epoch": 759
+    }
+}

wolof-translate/wolof_translate/checkpoints/t5_small_custom_train_results_fw_v3/best_checkpoints.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5d9617eba185df15b75da1871c2f2b5d2ab32eb089c21438228e8bcfac1595
+size 540763111