Spaces:
Runtime error
Runtime error
File size: 10,753 Bytes
3260908 2286f04 3260908 1a2dfce 2286f04 71befd1 46b6696 48c823d e519286 48c823d 2286f04 3022b4e 48c823d e519286 8acf519 48c823d e519286 71befd1 8acf519 71befd1 48c823d e519286 b7f929e 71befd1 b7f929e 48c823d 71befd1 48c823d 8acf519 fbc5903 71befd1 8acf519 48c823d b7f929e 48c823d 8acf519 cb15a69 48c823d cb15a69 48c823d 8acf519 71befd1 cb15a69 48c823d 71befd1 48c823d 71befd1 605f02f 71befd1 a61d64f 71befd1 4e0ade3 71befd1 3260908 1a2dfce 3260908 6e3f695 1399a2f 6e3f695 3260908 6e3f695 46b6696 6e3f695 46b6696 6e3f695 3260908 8fbaf5c 3260908 3022b4e e519286 3022b4e e519286 3260908 48c823d 71befd1 765f432 3260908 2286f04 3260908 e519286 3260908 765f432 a61d64f 2286f04 e519286 48c823d 0dc3012 16d53be 4f0ace0 16d53be e0470d3 48c823d e519286 0dc3012 5ce10a0 0dc3012 cfc86b2 cb15a69 64033b8 2051ea9 64033b8 cb15a69 48c823d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
from collections.abc import Mapping
from logging import getLogger
import datetime as dt
from dateutil.parser import isoparse
from fuzzywuzzy import fuzz
from mathtext_fastapi.intent_classification import predict_message_intent
from mathtext_fastapi.logging import prepare_message_data_for_logging
from mathtext.sentiment import sentiment
from mathtext.text2int import text2int, TOKENS2INT_ERROR_INT
log = getLogger(__name__)
PAYLOAD_VALUE_TYPES = {
'author_id': str,
'author_type': str,
'contact_uuid': str,
'message_body': str,
'message_direction': str,
'message_id': str,
'message_inserted_at': str,
'message_updated_at': str,
}
def build_nlu_response_object(nlu_type, data, confidence):
""" Turns nlu results into an object to send back to Turn.io
Inputs
- nlu_type: str - the type of nlu run (integer or sentiment-analysis)
- data: str/int - the student message
- confidence: - the nlu confidence score (sentiment) or '' (integer)
>>> build_nlu_response_object('integer', 8, 0)
{'type': 'integer', 'data': 8, 'confidence': 0}
>>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
{'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
"""
return {
'type': nlu_type,
'data': data,
'confidence': confidence
}
# def test_for_float_or_int(message_data, message_text):
# nlu_response = {}
# if type(message_text) == int or type(message_text) == float:
# nlu_response = build_nlu_response_object('integer', message_text, '')
# prepare_message_data_for_logging(message_data, nlu_response)
# return nlu_response
def test_for_number_sequence(message_text_arr, message_data, message_text):
""" Determines if the student's message is a sequence of numbers
>>> test_for_number_sequence(['1','2','3'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, '1, 2, 3')
{'type': 'integer', 'data': '1,2,3', 'confidence': 0}
>>> test_for_number_sequence(['a','b','c'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, 'a, b, c')
{}
"""
nlu_response = {}
if all(ele.isdigit() for ele in message_text_arr):
nlu_response = build_nlu_response_object(
'integer',
','.join(message_text_arr),
0
)
prepare_message_data_for_logging(message_data, nlu_response)
return nlu_response
def run_text2int_on_each_list_item(message_text_arr):
""" Attempts to convert each list item to an integer
Input
- message_text_arr: list - a set of text extracted from the student message
Output
- student_response_arr: list - a set of integers (32202 for error code)
>>> run_text2int_on_each_list_item(['1','2','3'])
[1, 2, 3]
"""
student_response_arr = []
for student_response in message_text_arr:
int_api_resp = text2int(student_response.lower())
student_response_arr.append(int_api_resp)
return student_response_arr
def run_sentiment_analysis(message_text):
""" Evaluates the sentiment of a student message
>>> run_sentiment_analysis("I am tired")
[{'label': 'NEGATIVE', 'score': 0.9997807145118713}]
>>> run_sentiment_analysis("I am full of joy")
[{'label': 'POSITIVE', 'score': 0.999882698059082}]
"""
# TODO: Add intent labelling here
# TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
return sentiment(message_text)
def run_intent_classification(message_text):
""" Process a student's message using basic fuzzy text comparison
>>> run_intent_classification("exit")
{'type': 'intent', 'data': 'exit', 'confidence': 1.0}
>>> run_intent_classification("exi")
{'type': 'intent', 'data': 'exit', 'confidence': 0.86}
>>> run_intent_classification("eas")
{'type': 'intent', 'data': '', 'confidence': 0}
>>> run_intent_classification("hard")
{'type': 'intent', 'data': '', 'confidence': 0}
>>> run_intent_classification("hardier")
{'type': 'intent', 'data': 'harder', 'confidence': 0.92}
"""
label = ''
ratio = 0
nlu_response = {'type': 'intent', 'data': label, 'confidence': ratio}
commands = [
'easier',
'exit',
'harder',
'hint',
'next',
'stop',
'tired',
'tomorrow',
'finished',
'help',
'please',
'understand',
'question',
'easier',
'easy',
'support'
]
for command in commands:
try:
ratio = fuzz.ratio(command, message_text.lower())
except:
ratio = 0
if ratio > 80:
nlu_response['data'] = command
nlu_response['confidence'] = ratio / 100
return nlu_response
def payload_is_valid(payload_object):
"""
>>> payload_is_valid({'author_id': '+5555555', 'author_type': 'OWNER', 'contact_uuid': '3246-43ad-faf7qw-zsdhg-dgGdg', 'message_body': 'thirty one', 'message_direction': 'inbound', 'message_id': 'SDFGGwafada-DFASHA4aDGA', 'message_inserted_at': '2022-07-05T04:00:34.03352Z', 'message_updated_at': '2023-04-06T10:08:23.745072Z'})
True
>>> payload_is_valid({"author_id": "@event.message._vnd.v1.chat.owner", "author_type": "@event.message._vnd.v1.author.type", "contact_uuid": "@event.message._vnd.v1.chat.contact_uuid", "message_body": "@event.message.text.body", "message_direction": "@event.message._vnd.v1.direction", "message_id": "@event.message.id", "message_inserted_at": "@event.message._vnd.v1.chat.inserted_at", "message_updated_at": "@event.message._vnd.v1.chat.updated_at"})
False
"""
try:
isinstance(
isoparse(payload_object.get('message_inserted_at','')),
dt.datetime
)
isinstance(
isoparse(payload_object.get('message_updated_at','')),
dt.datetime
)
except ValueError:
return False
return (
isinstance(payload_object, Mapping) and
isinstance(payload_object.get('author_id'), str) and
isinstance(payload_object.get('author_type'), str) and
isinstance(payload_object.get('contact_uuid'), str) and
isinstance(payload_object.get('message_body'), str) and
isinstance(payload_object.get('message_direction'), str) and
isinstance(payload_object.get('message_id'), str) and
isinstance(payload_object.get('message_inserted_at'), str) and
isinstance(payload_object.get('message_updated_at'), str)
)
def log_payload_errors(payload_object):
errors = []
try:
assert isinstance(payload_object, Mapping)
except Exception as e:
log.error(f'Invalid HTTP request payload object: {e}')
errors.append(e)
for k, typ in PAYLOAD_VALUE_TYPES.items():
try:
assert isinstance(payload_object.get(k), typ)
except Exception as e:
log.error(f'Invalid HTTP request payload object: {e}')
errors.append(e)
try:
assert isinstance(
dt.datetime.fromisoformat(payload_object.get('message_inserted_at')),
dt.datetime
)
except Exception as e:
log.error(f'Invalid HTTP request payload object: {e}')
errors.append(e)
try:
isinstance(
dt.datetime.fromisoformat(payload_object.get('message_updated_at')),
dt.datetime
)
except Exception as e:
log.error(f'Invalid HTTP request payload object: {e}')
errors.append(e)
return errors
def evaluate_message_with_nlu(message_data):
""" Process a student's message using NLU functions and send the result
>>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "8", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
{'type': 'integer', 'data': 8, 'confidence': 0}
>>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
{'type': 'sentiment', 'data': 'NEGATIVE', 'confidence': 0.9997807145118713}
"""
# Keeps system working with two different inputs - full and filtered @event object
# Call validate payload
log.info(f'Starting evaluate message: {message_data}')
if not payload_is_valid(message_data):
log_payload_errors(message_data)
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
try:
message_text = str(message_data.get('message_body', ''))
except:
log.error(f'Invalid request payload: {message_data}')
# use python logging system to do this//
return {'type': 'error', 'data': TOKENS2INT_ERROR_INT, 'confidence': 0}
# Run intent classification only for keywords
intent_api_response = run_intent_classification(message_text)
if intent_api_response['data']:
prepare_message_data_for_logging(message_data, intent_api_response)
return intent_api_response
number_api_resp = text2int(message_text.lower())
if number_api_resp == TOKENS2INT_ERROR_INT:
# Run intent classification with logistic regression model
predicted_label = predict_message_intent(message_text)
if predicted_label['confidence'] > 0.01:
nlu_response = predicted_label
else:
# Run sentiment analysis
sentiment_api_resp = sentiment(message_text)
nlu_response = build_nlu_response_object(
'sentiment',
sentiment_api_resp[0]['label'],
sentiment_api_resp[0]['score']
)
else:
nlu_response = build_nlu_response_object(
'integer',
number_api_resp,
0
)
prepare_message_data_for_logging(message_data, nlu_response)
return nlu_response
|