Spaces:

lulmer
/

paraphraser_ai

Runtime error

App Files Files Community

ULMER Louis (T0240644) commited on Nov 5, 2022

Commit

51636fd

1 Parent(s): 8746267

updating paraphraser

Browse files

Files changed (6) hide show

.gitignore +1 -0
README.md +3 -3
app.py +10 -4
backend/data_augmenter.py +13 -4
footer.py +76 -0
requirements.txt +2 -1

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Paraphraser.ai
-emoji: 💻
-colorFrom: yellow
-colorTo: red
 sdk: streamlit
 sdk_version: 1.10.0
 app_file: app.py

 ---
 title: Paraphraser.ai
+emoji: ✍️
+colorFrom: red
+colorTo: green
 sdk: streamlit
 sdk_version: 1.10.0
 app_file: app.py

app.py CHANGED Viewed

@@ -1,19 +1,22 @@
 import os
 import streamlit as st
 from backend.data_augmenter import BackTranslatorAugmenter
 os.environ['NO_PROXY'] = '127.0.0.1'
-st.set_page_config(layout="wide", page_title="Paraphraser.AI", page_icon="🤖")
-st.title('Paraphraser.AI 🤖')
 st.header("An intelligent sentence paraphraser")
 model_selection = st.sidebar.selectbox(
     'Select a paraphraser:',
-    ['Vladimir 🧑🏼','Maria 👩🏽'],
 )
 input_text = st.text_area('Please type the text to paraphrase')
 class DummyAugmenter:
     def __init__(self, in_lang="en", out_lang="ru") -> None:
         pass
@@ -25,8 +28,11 @@ if model_selection == 'Vladimir 🧑🏼':
     model = BackTranslatorAugmenter(in_lang="en", out_lang="ru")
 if model_selection == 'Maria 👩🏽':
     model = BackTranslatorAugmenter(in_lang="en", out_lang="es")
 if input_text:
     st.header(f"Paraphrased text :")
     st.write("".join(model.back_translate(input_text)))

 import os
 import streamlit as st
 from backend.data_augmenter import BackTranslatorAugmenter
+from footer import footer
 os.environ['NO_PROXY'] = '127.0.0.1'
+st.set_page_config(layout="wide", page_title="Paraphraser.AI", page_icon="🤖✍️")
+st.title('Paraphraser.AI 🤖✍️')
 st.header("An intelligent sentence paraphraser")
+st.markdown('''This is a demo of a system that can rewrite some given paragraphs with slight differences.''')
 model_selection = st.sidebar.selectbox(
     'Select a paraphraser:',
+    ['Vladimir 🧑🏼','Maria 👩🏽','Jacques 👨'],
 )
 input_text = st.text_area('Please type the text to paraphrase')
 class DummyAugmenter:
     def __init__(self, in_lang="en", out_lang="ru") -> None:
         pass
     model = BackTranslatorAugmenter(in_lang="en", out_lang="ru")
 if model_selection == 'Maria 👩🏽':
     model = BackTranslatorAugmenter(in_lang="en", out_lang="es")
+if model_selection == 'Jacques 👨':
+    model = BackTranslatorAugmenter(in_lang="en", out_lang="fr")
 if input_text:
     st.header(f"Paraphrased text :")
     st.write("".join(model.back_translate(input_text)))
+footer()

backend/data_augmenter.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import argparse
 import time
 from tqdm import tqdm
@@ -7,6 +8,8 @@ import os
 import json
 import torch
 from dotenv import load_dotenv
 load_dotenv()
 from nltk.tokenize import sent_tokenize
@@ -63,9 +66,14 @@ class BackTranslatorAugmenter:
         if verbose:
             tic = time.time()
         encoded_text = self.in_tokenizer(
-            text, return_tensors="pt", padding=True, truncation=True
         ).to(self.device)
-        in_generated_ids = self.in_model.generate(**encoded_text)
         in_preds = [
             self.in_tokenizer.decode(
@@ -76,9 +84,10 @@ class BackTranslatorAugmenter:
         if verbose:
             print("in_pred : ", in_preds)
         encoded_text = self.out_tokenizer(
-            in_preds, return_tensors="pt", padding=True, truncation=True
         ).to(self.device)
-        out_generated_ids = self.out_model.generate(**encoded_text)
         out_preds = [
             self.out_tokenizer.decode(
                 gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True

+#%%
 import argparse
 import time
 from tqdm import tqdm
 import json
 import torch
 from dotenv import load_dotenv
+#%%
 load_dotenv()
 from nltk.tokenize import sent_tokenize
         if verbose:
             tic = time.time()
         encoded_text = self.in_tokenizer(
+            text, return_tensors="pt", padding=True, truncation=True, return_overflowing_tokens=True
         ).to(self.device)
+        if encoded_text['num_truncated_tokens'][0] > 0:
+            print('Text is too long ')
+            return self.back_translate_long(text,verbose=verbose)
+        in_generated_ids = self.in_model.generate(inputs=encoded_text['input_ids'],
+            attention_mask=encoded_text["attention_mask"])
         in_preds = [
             self.in_tokenizer.decode(
         if verbose:
             print("in_pred : ", in_preds)
         encoded_text = self.out_tokenizer(
+            in_preds, return_tensors="pt", padding=True, truncation=True,return_overflowing_tokens=True
         ).to(self.device)
+        out_generated_ids = self.out_model.generate(inputs=encoded_text['input_ids'],
+            attention_mask=encoded_text["attention_mask"])
         out_preds = [
             self.out_tokenizer.decode(
                 gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True

footer.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import streamlit as st
+from htbuilder import HtmlElement, div, ul, li, br, hr, a, p, img, styles, classes, fonts
+from htbuilder.units import percent, px
+from htbuilder.funcs import rgba, rgb
+def image(src_as_string, **style):
+    return img(src=src_as_string, style=styles(**style))
+def link(link, text, **style):
+    return a(_href=link, _target="_blank", style=styles(**style))(text)
+def layout(*args):
+    style = """
+    <style>
+      # MainMenu {visibility: hidden;}
+      footer {visibility: hidden;}
+     .stApp { bottom: 105px; }
+    </style>
+    """
+    style_div = styles(
+        position="fixed",
+        left=0,
+        bottom=0,
+        margin=px(0, 0, 0, 0),
+        width=percent(100),
+        color="black",
+        text_align="center",
+        height="auto",
+        opacity=1
+    )
+    style_hr = styles(
+        display="block",
+        margin=px(8, 8, "auto", "auto"),
+        border_style="inset",
+        border_width=px(2)
+    )
+    body = p()
+    foot = div(
+        style=style_div
+    )(
+        hr(
+            style=style_hr
+        ),
+        body
+    )
+    st.markdown(style, unsafe_allow_html=True)
+    for arg in args:
+        if isinstance(arg, str):
+            body(arg)
+        elif isinstance(arg, HtmlElement):
+            body(arg)
+    st.markdown(str(foot), unsafe_allow_html=True)
+def footer():
+    myargs = [
+        "Made in ",
+        image('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4',
+              width=px(25), height=px(25)),
+        br(),
+        "with ❤️ by Louis Ulmer ",
+        br(),
+        link("https://www.linkedin.com/in/louisulmer/", image('https://logospng.org/download/linkedin/logo-linkedin-icon-4096.png',width=px(25), height=px(25))),
+    ]
+    layout(*myargs)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ transformers[sentencepiece]
 pandas
 scikit-learn
 nltk
-python-dotenv

 pandas
 scikit-learn
 nltk
+python-dotenv
+htbuilder