import streamlit as st from transformers import pipeline import unicodedata import re def contains_text(text): return re.search('[A-Za-z]', text) or re.search('[א-ת]', text) def normalize(text): return unicodedata.normalize('NFC', text ).replace('\u05ba', '\u05b9' ).replace('\u05be', '-' ).replace('״', '"' ).replace("׳", "'") with st.spinner('Loading TaatikNet framework...'): pipe = pipeline("text2text-generation", model='malper/taatiknet', device_map="auto") st.success('Loaded!') text = st.text_area('Enter text and press ctrl/command+enter:') if text: words = [normalize(x) for x in text.split() if contains_text(x)] if len(words) > 0: outputs = pipe(words, max_length=200, num_beams=5, num_return_sequences=5) texts = [ ' '.join(x['generated_text'] for x in option) for option in zip(*outputs) ] st.write(texts[0]) st.write('Other options:') for option in texts[1:]: st.write(option)