File size: 5,188 Bytes
075c27b
 
 
 
 
 
 
 
 
 
 
 
5657691
075c27b
 
 
5657691
075c27b
 
 
 
 
 
 
 
 
 
 
 
5657691
075c27b
 
 
 
 
 
 
 
 
5657691
075c27b
 
 
 
 
 
 
 
 
 
 
5657691
075c27b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c12013
075c27b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82b848a
075c27b
 
 
 
 
 
 
 
 
b9cab86
075c27b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
from annotated_text import annotated_text
from multiprocessing import Process
import math
import re
import json
import difflib
import requests
import time
import os

def start_server():   
    '''Helper to start to service through Unicorn '''
    os.system("uvicorn InferenceServer:app --port 8080 --host 0.0.0.0 --workers 1")

def load_models():
    '''One time loading/ Init of models and starting server as a seperate process'''
    if not is_port_in_use(8080):
        with st.spinner(text="Loading models, please wait..."):
            proc = Process(target=start_server, args=(), daemon=True)
            proc.start()
            while not is_port_in_use(8080):
                time.sleep(1)
            st.success("Model server started.")
    else:
        st.success("Model server already running...")
    st.session_state['models_loaded'] = True

def is_port_in_use(port):
    '''Helper to check if service already running'''
    import socket
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex(('0.0.0.0', port)) == 0

if 'models_loaded' not in st.session_state:
    st.session_state['models_loaded'] = False


def get_correction(input_text, model):
    '''Invokes the Neuspell inference service'''
    correct_request = "http://0.0.0.0:8080/correct?input_sentence="+input_text + "&model=" + model
    correct_response = requests.get(correct_request)
    correct_json = json.loads(correct_response.text)
    corrected_sentence = correct_json["corrected_sentence"]
    diff = diff_strings(input_text, corrected_sentence)
    st.markdown(f'##### Corrected text:')
    st.write('')
    annotated_text(*diff)


def diff_strings(a, b):
    '''Highlights corrections with annotated_text library'''
    result = []
    diff = difflib.Differ().compare(a.split(), b.split())
    replacement = ""
    for line in diff:
        if line.startswith("  "):
            if len(replacement) == 0:
                result.append(" ")
                result.append(line[2:])
            else:
                result.append(" ")
                result.append(("", replacement, "#39ff14"))
                replacement = ""
                result.append(line[2:])
        if line.startswith("- "):
            if len(replacement) == 0:
                replacement = line[2:]
            else:
                result.append(" ")
                result.append(("", replacement, "#39ff14"))
                replacement = ""
        elif line.startswith("+ "):
            if len(replacement) == 0:
                result.append((line[2:], "", "#39ff14"))
            else:
                result.append(" ")
                result.append((line[2:], replacement, "#39ff14"))
                replacement = ""
    return result    
        
if __name__ == "__main__":
    
        st.title('Neuspell - A Python library')
        st.subheader('For fast & accurate spell correction')
        st.markdown("Added to HuggingFace hub and spaces with 💙  by Prithivi Da.[Fork with HF hub support](https://github.com/PrithivirajDamodaran/neuspell) | [Original repo](https://github.com/neuspell/neuspell)", unsafe_allow_html=True)
        st.markdown("<p style='color:blue; display:inline'> Integrate with your app with just few lines of code </p>", unsafe_allow_html=True)
        st.markdown("""
                    ```python 
                    import neuspell
                    from neuspell import BertsclstmChecker
                    checker = BertsclstmChecker()
                    checker.from_pretrained()

                    checker.correct("Oh! I loovee it when it starts to rain, its smells awesomee")
                    #  (or)
                    checker.correct_strings(["sent1", "sent2"])
                    #  (or)
                    checker.correct_from_file(src="typo_ridden_texts.txt")
                    ```    
                    """)

        models = [
                    "BERT-LSTM",
                    "ELMo-LSTM",
                    "CNN-LSTM"
                    ]
                    

        examples = [
                    "what is the reazon for everyone to laeve the comapny?",
                    "I loovee it when it starts to rain, it smells awesomee",
                    "Feel free to rech out to me",
                    "Life is shart, so live freely",
                    "We know the boy actually stol the books",
                    " We all ate the foood and then made desert",
                    ]

        if not st.session_state['models_loaded']:
            load_models()                     

        st.markdown(f'##### Supported Models:')
        selected_model = st.selectbox(
        label="Choose a model: (Test and integrate the best that suits your needs)",
        options=models
        )

        st.markdown(f'##### Try it now:')
        input_text = st.selectbox(
        label="Choose an example",
        options=examples
        )
        st.write("(or)")
        input_text = st.text_input(
            label="Bring your own sentence",
            value=input_text
        )

        if input_text.strip(): 
            get_correction(input_text, selected_model)