import gradio as gr from typing import List from punctuators.models import SBDModelONNX # Instantiate this model # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory. m = SBDModelONNX.from_pretrained("sbd_multi_lang") def sentence_boundary_detection_old(input_texts): # Run inference results: List[List[str]] = m.infer([input_texts]) print(results) sentences = "\n".join(results[0]) return sentences, len(results[0]) import nltk import gradio as gr # Download the necessary NLTK data files nltk.download('punkt_tab') # Load the Slovenian tokenizer slovenian_tokenizer = nltk.data.load('tokenizers/punkt/slovenian.pickle') def sentence_boundary_detection(text): # Tokenize the text into sentences sentences = slovenian_tokenizer.tokenize(text) # Count the number of sentences sentence_count = len(sentences) # Join sentences with newlines for display sentences_text = "\n".join(sentences) return sentences_text, sentence_count # Gradio interface iface = gr.Interface( fn=sentence_boundary_detection, inputs=gr.Textbox(label="Input Text", lines=10, placeholder="Enter text here..."), outputs=[ gr.Textbox(label="Sentences", lines=10, placeholder="Sentences will appear here..."), gr.Number(label="Number of Sentences") ], title="Sentence Boundary Detection", description="Enter text to detect sentence boundaries and count the number of sentences." ) # Launch the Gradio app iface.launch()