HamidRezaAttar commited on
Commit
b384e43
1 Parent(s): 7a514ae

add normalizer

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. normalizer.py +17 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline, set_seed
3
  from transformers import AutoTokenizer
 
4
  import random
5
 
6
  import meta
@@ -120,12 +121,13 @@ def main():
120
 
121
  text = st.text_area("Enter text", prompt_box)
122
  generation_kwargs_ph = st.empty()
123
-
124
  if st.button("Generate !"):
125
  with st.spinner(text="Generating ..."):
126
  generation_kwargs_ph.markdown(", ".join([f"`{k}`: {v}" for k, v in generation_kwargs.items()]))
127
  if text:
128
  generated_text = generator.generate(text, generation_kwargs)
 
129
  st.markdown(
130
  f'<p class="ltr ltr-box">'
131
  f'<span class="result-text">{text} <span>'
 
1
  import streamlit as st
2
  from transformers import pipeline, set_seed
3
  from transformers import AutoTokenizer
4
+ from normalizer import Normalizer
5
  import random
6
 
7
  import meta
 
121
 
122
  text = st.text_area("Enter text", prompt_box)
123
  generation_kwargs_ph = st.empty()
124
+ cleaner = Normalizer()
125
  if st.button("Generate !"):
126
  with st.spinner(text="Generating ..."):
127
  generation_kwargs_ph.markdown(", ".join([f"`{k}`: {v}" for k, v in generation_kwargs.items()]))
128
  if text:
129
  generated_text = generator.generate(text, generation_kwargs)
130
+ generated_text = cleaner.clean_txt(generated_text)
131
  st.markdown(
132
  f'<p class="ltr ltr-box">'
133
  f'<span class="result-text">{text} <span>'
normalizer.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Normalizer:
2
+
3
+ def __init__(self):
4
+ pass
5
+
6
+ def remove_repetitions(self, text):
7
+ first_ocurrences = []
8
+ for sentence in text.split("."):
9
+ if sentence not in first_ocurrences:
10
+ first_ocurrences.append(sentence)
11
+ return '.'.join(first_ocurrences)
12
+
13
+ def trim_last_sentence(self, text):
14
+ return text[:text.rfind(".")+1]
15
+
16
+ def clean_txt(self, text):
17
+ return self.trim_last_sentence(self.remove_repetitions(text))