import streamlit as st
from transformers import pipeline
import unicodedata
import re

def contains_text(text):
    return re.search('[A-Za-z]', text) or re.search('[א-ת]', text)

def normalize(text):
    return unicodedata.normalize('NFC', text
        ).replace('\u05ba', '\u05b9'
        ).replace('\u05be', '-'
        ).replace('״', '"'
        ).replace("׳", "'")

with st.spinner('Loading TaatikNet framework...'):
    pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto")
st.success('Loaded!')

text = st.text_area('Enter text and press ctrl/command+enter:')
if text:
    words = [normalize(x) for x in text.split() if contains_text(x)]
    if len(words) > 0:
        outputs = pipe(words, max_length=200, num_beams=5, num_return_sequences=5)
        texts = [
            ' '.join(x['generated_text'] for x in option)
            for option in zip(*outputs)
        ]
        st.write(texts[0])
        st.write('Other options:')
        for option in texts[1:]:
            st.write(option)