File size: 10,968 Bytes
b5b1714
 
3260908
2286f04
3260908
1a2dfce
2286f04
71befd1
b5b1714
46b6696
48c823d
 
e519286
48c823d
2286f04
 
3022b4e
 
 
 
 
 
 
 
 
 
 
48c823d
e519286
8acf519
48c823d
e519286
71befd1
8acf519
71befd1
 
 
 
 
 
48c823d
e519286
 
 
 
 
b7f929e
 
71befd1
 
 
 
 
 
b7f929e
 
48c823d
71befd1
 
 
 
 
 
 
 
48c823d
 
8acf519
fbc5903
 
71befd1
8acf519
48c823d
 
b7f929e
 
48c823d
8acf519
cb15a69
48c823d
 
cb15a69
48c823d
8acf519
71befd1
 
 
cb15a69
48c823d
 
 
 
 
 
 
 
71befd1
 
 
 
 
 
 
 
48c823d
 
 
 
 
71befd1
 
 
 
 
 
 
 
 
 
 
 
 
 
b5b1714
71befd1
 
b5b1714
71befd1
 
 
 
605f02f
71befd1
a61d64f
 
 
 
 
 
 
 
 
b1bbf8f
 
 
71befd1
b5b1714
 
 
 
 
 
 
4e0ade3
b5b1714
4e0ade3
b5b1714
 
 
 
 
71befd1
 
 
 
3260908
 
1a2dfce
3260908
6e3f695
1399a2f
6e3f695
3260908
6e3f695
 
46b6696
6e3f695
 
 
46b6696
6e3f695
 
 
 
3260908
 
 
 
 
 
 
 
 
8fbaf5c
3260908
 
3022b4e
e519286
 
 
 
 
 
 
3022b4e
 
 
 
 
 
e519286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3260908
48c823d
71befd1
 
 
 
 
 
 
 
765f432
3260908
2286f04
3260908
 
e519286
 
3260908
765f432
a61d64f
2286f04
 
 
e519286
48c823d
0dc3012
16d53be
 
4f0ace0
16d53be
 
e0470d3
48c823d
e519286
0dc3012
 
5ce10a0
0dc3012
cfc86b2
 
 
 
 
 
 
 
cb15a69
64033b8
 
 
2051ea9
64033b8
cb15a69
48c823d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
import re

from collections.abc import Mapping
from logging import getLogger
import datetime as dt
from dateutil.parser import isoparse

from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from mathtext_fastapi.intent_classification import predict_message_intent
from mathtext_fastapi.logging import prepare_message_data_for_logging
from mathtext.sentiment import sentiment
from mathtext.text2int import text2int, TOKENS2INT_ERROR_INT

log = getLogger(__name__)

PAYLOAD_VALUE_TYPES = {
    'author_id': str,
    'author_type': str,
    'contact_uuid': str,
    'message_body': str,
    'message_direction': str,
    'message_id': str,
    'message_inserted_at': str,
    'message_updated_at': str,
    }


def build_nlu_response_object(nlu_type, data, confidence):
    """ Turns nlu results into an object to send back to Turn.io
    Inputs
    - nlu_type: str - the type of nlu run (integer or sentiment-analysis)
    - data: str/int - the student message
    - confidence: - the nlu confidence score (sentiment) or '' (integer)

    >>> build_nlu_response_object('integer', 8, 0)
    {'type': 'integer', 'data': 8, 'confidence': 0}

    >>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
    {'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
    """
    return {
        'type': nlu_type,
        'data': data,
        'confidence': confidence
        }


# def test_for_float_or_int(message_data, message_text):
#     nlu_response = {}
#     if type(message_text) == int or type(message_text) == float:
#         nlu_response = build_nlu_response_object('integer', message_text, '')
#         prepare_message_data_for_logging(message_data, nlu_response)
#     return nlu_response


def test_for_number_sequence(message_text_arr, message_data, message_text):
    """ Determines if the student's message is a sequence of numbers

    >>> test_for_number_sequence(['1','2','3'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, '1, 2, 3')
    {'type': 'integer', 'data': '1,2,3', 'confidence': 0}

    >>> test_for_number_sequence(['a','b','c'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, 'a, b, c')
    {}
    """
    nlu_response = {}
    if all(ele.isdigit() for ele in message_text_arr):
        nlu_response = build_nlu_response_object(
            'integer',
            ','.join(message_text_arr),
            0
        )
        prepare_message_data_for_logging(message_data, nlu_response)
    return nlu_response


def run_text2int_on_each_list_item(message_text_arr):
    """ Attempts to convert each list item to an integer

    Input
    - message_text_arr: list - a set of text extracted from the student message

    Output
    - student_response_arr: list - a set of integers (32202 for error code)

    >>> run_text2int_on_each_list_item(['1','2','3'])
    [1, 2, 3]
    """
    student_response_arr = []
    for student_response in message_text_arr:
        int_api_resp = text2int(student_response.lower())
        student_response_arr.append(int_api_resp)
    return student_response_arr


def run_sentiment_analysis(message_text):
    """ Evaluates the sentiment of a student message

    >>> run_sentiment_analysis("I am tired")
    [{'label': 'NEGATIVE', 'score': 0.9997807145118713}]

    >>> run_sentiment_analysis("I am full of joy")
    [{'label': 'POSITIVE', 'score': 0.999882698059082}]
    """
    # TODO: Add intent labelling here
    # TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
    return sentiment(message_text)


def run_intent_classification(message_text):
    """ Process a student's message using basic fuzzy text comparison

    >>> run_intent_classification("exit")
    {'type': 'intent', 'data': 'exit', 'confidence': 1.0}
    >>> run_intent_classification("exi")     
    {'type': 'intent', 'data': 'exit', 'confidence': 0.86}
    >>> run_intent_classification("eas")
    {'type': 'intent', 'data': '', 'confidence': 0}
    >>> run_intent_classification("hard")
    {'type': 'intent', 'data': '', 'confidence': 0}
    >>> run_intent_classification("hardier") 
    {'type': 'intent', 'data': 'harder', 'confidence': 0.92}
    """
    label = 'no_match'
    ratio = 0
    nlu_response = {'type': 'intent', 'data': label, 'confidence': ratio}
    keywords = [
        'easier',
        'exit',
        'harder',
        'hint',
        'next',
        'stop',
        'tired',
        'tomorrow',
        'finished',
        'help',
        'please',
        'understand',
        'question',
        'easier',
        'easy',
        'support',
        'skip',
        'menu'
    ]

    try:
        tokens = re.findall(r"[-a-zA-Z'_]+", message_text.lower())
    except AttributeError:
        tokens = ''

    for keyword in keywords:
        try:
            tok, score = process.extractOne(keyword, tokens, scorer=fuzz.ratio)
        except:
            score = 0

        if score > 80:
            nlu_response['data'] = keyword
            nlu_response['confidence'] = score
    
    return nlu_response


def payload_is_valid(payload_object):
    """
    >>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
    True

    >>> payload_is_valid({"author_id": "@event.message._vnd.v1.chat.owner", "author_type": "@event.message._vnd.v1.author.type", "contact_uuid": "@event.message._vnd.v1.chat.contact_uuid", "message_body": "@event.message.text.body", "message_direction": "@event.message._vnd.v1.direction", "message_id": "@event.message.id", "message_inserted_at": "@event.message._vnd.v1.chat.inserted_at", "message_updated_at": "@event.message._vnd.v1.chat.updated_at"})
    False
    """
    try:
        isinstance(
            isoparse(payload_object.get('message_inserted_at','')),
            dt.datetime
        )
        isinstance(
            isoparse(payload_object.get('message_updated_at','')),
            dt.datetime
        )
    except ValueError:
        return False
    return (
        isinstance(payload_object, Mapping) and
        isinstance(payload_object.get('author_id'), str) and
        isinstance(payload_object.get('author_type'), str) and
        isinstance(payload_object.get('contact_uuid'), str) and
        isinstance(payload_object.get('message_body'), str) and
        isinstance(payload_object.get('message_direction'), str) and
        isinstance(payload_object.get('message_id'), str) and
        isinstance(payload_object.get('message_inserted_at'), str) and
        isinstance(payload_object.get('message_updated_at'), str)    
    )


def log_payload_errors(payload_object):
    errors = []
    try:
        assert isinstance(payload_object, Mapping)
    except Exception as e:
        log.error(f'Invalid HTTP request payload object: {e}')
        errors.append(e)
    for k, typ in PAYLOAD_VALUE_TYPES.items():
        try:
            assert isinstance(payload_object.get(k), typ)
        except Exception as e:
            log.error(f'Invalid HTTP request payload object: {e}')
            errors.append(e)
    try:
        assert isinstance(
            dt.datetime.fromisoformat(payload_object.get('message_inserted_at')),
            dt.datetime
        )
    except Exception as e:
        log.error(f'Invalid HTTP request payload object: {e}')
        errors.append(e)
    try: 
        isinstance(
            dt.datetime.fromisoformat(payload_object.get('message_updated_at')),
            dt.datetime
        )
    except Exception as e:
        log.error(f'Invalid HTTP request payload object: {e}')
        errors.append(e)
    return errors


def evaluate_message_with_nlu(message_data):
    """ Process a student's message using NLU functions and send the result
    
    >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "8", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
    {'type': 'integer', 'data': 8, 'confidence': 0}

    >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
    {'type': 'sentiment', 'data': 'NEGATIVE', 'confidence': 0.9997807145118713}
    """
    # Keeps system working with two different inputs - full and filtered @event object
    # Call validate payload
    log.info(f'Starting evaluate message: {message_data}')

    if not payload_is_valid(message_data):
        log_payload_errors(message_data)
        return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}

    try:
        message_text = str(message_data.get('message_body', ''))
    except:
        log.error(f'Invalid request payload: {message_data}')
        # use python logging system to do this//
        return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}

    # Run intent classification only for keywords
    intent_api_response = run_intent_classification(message_text)
    if intent_api_response['data']:
        prepare_message_data_for_logging(message_data, intent_api_response)
        return intent_api_response

    number_api_resp = text2int(message_text.lower())

    if number_api_resp == TOKENS2INT_ERROR_INT:
        # Run intent classification with logistic regression model
        predicted_label = predict_message_intent(message_text)
        if predicted_label['confidence'] > 0.01:
            nlu_response = predicted_label
        else:
            # Run sentiment analysis
            sentiment_api_resp = sentiment(message_text)
            nlu_response = build_nlu_response_object(
                'sentiment',
                sentiment_api_resp[0]['label'],
                sentiment_api_resp[0]['score']
            )
    else:
        nlu_response = build_nlu_response_object(
            'integer',
            number_api_resp,
            0
        )

    prepare_message_data_for_logging(message_data, nlu_response)
    return nlu_response