Greg Thompson commited on
Commit
809d03c
1 Parent(s): 44e23f4

Update nlu evaluation with basic intent classification using fuzzy comparison

Browse files
app.py CHANGED
@@ -97,8 +97,8 @@ async def evaluate_user_message_with_nlu_api(request: Request):
97
 
98
  Output
99
  - int_data_dict or sent_data_dict: dict - the type of NLU run and result
100
- {'type':'integer', 'data': '8'}
101
- {'type':'sentiment', 'data': 'negative'}
102
  """
103
  data_dict = await request.json()
104
  message_data = data_dict.get('message_data', '')
 
97
 
98
  Output
99
  - int_data_dict or sent_data_dict: dict - the type of NLU run and result
100
+ {'type':'integer', 'data': '8', 'confidence': 0}
101
+ {'type':'sentiment', 'data': 'negative', 'confidence': 0.99}
102
  """
103
  data_dict = await request.json()
104
  message_data = data_dict.get('message_data', '')
mathtext_fastapi/data/text2int_results.csv CHANGED
@@ -1,92 +1,105 @@
1
  input,output,text2int,score
2
- notanumber,32202,32202,True
3
- this is not a number,32202,32202,True
4
- fourteen,14,14,True
5
- forteen,14,14,True
6
- one thousand four hundred ninety two,1492,1492,True
7
- one thousand ninety two,1092,1092,True
8
- Fourteen Hundred Ninety-Two,1492,1492,True
9
- Fourteen Hundred,1400,1400,True
10
- Ninety nine,99,99,True
11
- fifteen thousand five hundred-sixty,15560,15560,True
12
- three hundred fifty,350,350,True
13
- one nine eight five,1985,1985,True
14
- nineteen eighty-five,1985,1605,False
15
- oh one,1,1,True
16
- six oh 1,601,601,True
17
- sex,6,6,True
18
- six,6,6,True
19
- eight oh,80,8,False
20
- eighty,80,80,True
21
- ate,8,1,False
22
- double eight,88,32202,False
23
- eight three seven five three O nine,8375309,8375329,False
24
- eight three seven five three oh nine,8375309,8375309,True
25
- eight three seven five three zero nine,8375309,8375309,True
26
- eight three seven five three oh ni-ee-ine,8375309,837530611,False
27
- two eight,28,16,False
28
- seven oh eleven,7011,77,False
29
- seven elevens,77,77,True
30
- seven eleven,711,77,False
31
- ninety nine oh five,9905,149,False
32
- seven 0 seven 0 seven 0 seven,7070707,7070707,True
33
- 123 hundred,123000,223,False
34
- 5 o 5,505,525,False
35
- 15 o 5,1505,22,False
36
- 15-o 5,1505,22,False
37
- 15 o-5,1505,22,False
38
- 911-thousand,911000,911000,True
39
- twenty-two twenty-two,2222,44,False
40
- twenty-two twenty-twos,484,44,False
41
- four eighty four,484,404,False
42
- four eighties,320,72,False
43
- four eighties and nine nineties,1130,243,False
44
- ninety nine hundred and seventy seven,9977,276,False
45
- seven thousands,7000,7000,True
46
- 2 hundreds,200,200,True
47
- 99 thousands and one,99001,99001,True
48
- "forty-five thousand, seven hundred and nine",45709,1161,False
49
- eighty eight hundred eighty,8880,268,False
50
- a hundred hundred,10000,32202,False
51
- a hundred thousand,100000,32202,False
52
- a hundred million,100000000,32202,False
53
- nineteen ninety nine,1999,1809,False
54
- forteen twenty seven,1427,307,False
55
- seventeen-thousand and seventy two,17072,17072,True
56
- two hundred and nine,209,209,True
57
- two thousand ten,2010,2010,True
58
- two thousand and ten,2010,2010,True
59
- twelve million,12000000,12000000,True
60
- 8 billion,8000000000,8000000000,True
61
- twenty ten,2010,2010,True
62
- thirty-two hundred,3200,3200,True
63
- nine,9,9,True
64
- forty two,42,42,True
65
- 1 2 three,123,123,True
66
- fourtean,14,14,True
67
- one tousand four hundred ninty two,1492,1492,True
68
- Furteen Hundrd Ninety-Too,1492,1492,True
69
- forrteen,14,14,True
70
- sevnteen-thosand and seventy two,17072,17072,True
71
- ninety nine hundred ad seventy seven,9977,32202,False
72
- seven thusands,7000,7000,True
73
- 2 hunreds,200,200,True
74
- 99 tousands and one,99001,99001,True
75
- eighty ate hundred eighty,8880,261,False
76
- fourteen Hundred,1400,1400,True
77
- 8 Bilion,8000000000,8000000,False
78
- one million three thousand one,1003001,1003001,True
79
- four million nine thousand seven,4009007,4009007,True
80
- two million five hundred thousand,2500000,2001500,False
81
- two tousand ten,2010,2010,True
82
- two thousand teen,2010,2007,False
83
- tvelve milion,12000000,12000000,True
84
- tventy ten,2010,2010,True
85
- tirty-twoo hunred,3200,3200,True
86
- sevn thoosands,7000,7000,True
87
- five,5,5,True
88
- ten,10,10,True
89
- one two three and ten,12310,51,False
90
- ONE MILLion three hunded and fiv,1000305,1000305,True
91
- "50,500 and six",50506,50506,True
92
- one_million_and_five,1000005,1000005,True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  input,output,text2int,score
2
+ notanumber,32202.0,32202.0,True
3
+ this is not a number,32202.0,32202.0,True
4
+ fourteen,14.0,14.0,True
5
+ forteen,14.0,14.0,True
6
+ one thousand four hundred ninety two,1492.0,1492.0,True
7
+ one thousand ninety two,1092.0,1092.0,True
8
+ Fourteen Hundred Ninety-Two,1492.0,1492.0,True
9
+ Fourteen Hundred,1400.0,1400.0,True
10
+ Ninety nine,99.0,99.0,True
11
+ fifteen thousand five hundred-sixty,15560.0,15560.0,True
12
+ three hundred fifty,350.0,350.0,True
13
+ one nine eight five,1985.0,1985.0,True
14
+ nineteen eighty-five,1985.0,1605.0,False
15
+ oh one,1.0,1.0,True
16
+ six oh 1,601.0,601.0,True
17
+ sex,6.0,6.0,True
18
+ six,6.0,6.0,True
19
+ eight oh,80.0,8.0,False
20
+ eighty,80.0,80.0,True
21
+ ate,8.0,1.0,False
22
+ double eight,88.0,8.0,False
23
+ eight three seven five three O nine,8375309.0,8375329.0,False
24
+ eight three seven five three oh nine,8375309.0,8375309.0,True
25
+ eight three seven five three zero nine,8375309.0,8375309.0,True
26
+ eight three seven five three oh ni-ee-ine,8375309.0,837530619.0,False
27
+ two eight,28.0,16.0,False
28
+ seven oh eleven,7011.0,77.0,False
29
+ seven elevens,77.0,77.0,True
30
+ seven eleven,711.0,77.0,False
31
+ ninety nine oh five,9905.0,149.0,False
32
+ seven 0 seven 0 seven 0 seven,7070707.0,7070707.0,True
33
+ 123 hundred,123000.0,223.0,False
34
+ 5 o 5,505.0,525.0,False
35
+ 15 o 5,1505.0,22.0,False
36
+ 15-o 5,1505.0,22.0,False
37
+ 15 o-5,1505.0,22.0,False
38
+ 911-thousand,911000.0,911000.0,True
39
+ twenty-two twenty-two,2222.0,44.0,False
40
+ twenty-two twenty-twos,484.0,44.0,False
41
+ four eighty four,484.0,404.0,False
42
+ four eighties,320.0,72.0,False
43
+ four eighties and nine nineties,1130.0,243.0,False
44
+ ninety nine hundred and seventy seven,9977.0,276.0,False
45
+ seven thousands,7000.0,7000.0,True
46
+ 2 hundreds,200.0,200.0,True
47
+ 99 thousands and one,99001.0,99001.0,True
48
+ "forty-five thousand, seven hundred and nine",45709.0,1161.0,False
49
+ eighty eight hundred eighty,8880.0,268.0,False
50
+ a hundred hundred,10000.0,100.0,False
51
+ a hundred thousand,100000.0,100.0,False
52
+ a hundred million,100000000.0,100.0,False
53
+ nineteen ninety nine,1999.0,1809.0,False
54
+ forteen twenty seven,1427.0,307.0,False
55
+ seventeen-thousand and seventy two,17072.0,17072.0,True
56
+ two hundred and nine,209.0,209.0,True
57
+ two thousand ten,2010.0,2010.0,True
58
+ two thousand and ten,2010.0,2010.0,True
59
+ twelve million,12000000.0,12000000.0,True
60
+ 8 billion,8000000000.0,8000000000.0,True
61
+ twenty ten,2010.0,2010.0,True
62
+ thirty-two hundred,3200.0,3200.0,True
63
+ nine,9.0,9.0,True
64
+ forty two,42.0,42.0,True
65
+ 1 2 three,123.0,123.0,True
66
+ fourtean,14.0,14.0,True
67
+ one tousand four hundred ninty two,1492.0,1492.0,True
68
+ Furteen Hundrd Ninety-Too,1492.0,1492.0,True
69
+ forrteen,14.0,14.0,True
70
+ sevnteen-thosand and seventy two,17072.0,17072.0,True
71
+ ninety nine hundred ad seventy seven,9977.0,90.0,False
72
+ seven thusands,7000.0,7000.0,True
73
+ 2 hunreds,200.0,200.0,True
74
+ 99 tousands and one,99001.0,99001.0,True
75
+ eighty ate hundred eighty,8880.0,261.0,False
76
+ fourteen Hundred,1400.0,1400.0,True
77
+ 8 Bilion,8000000000.0,8000000.0,False
78
+ one million three thousand one,1003001.0,1003001.0,True
79
+ four million nine thousand seven,4009007.0,4009007.0,True
80
+ two million five hundred thousand,2500000.0,2001500.0,False
81
+ two tousand ten,2010.0,2010.0,True
82
+ two thousand teen,2010.0,2007.0,False
83
+ tvelve milion,12000000.0,12000000.0,True
84
+ tventy ten,2010.0,2010.0,True
85
+ tirty-twoo hunred,3200.0,3200.0,True
86
+ sevn thoosands,7000.0,7000.0,True
87
+ five,5.0,5.0,True
88
+ ten,10.0,10.0,True
89
+ one two three and ten,12310.0,51.0,False
90
+ ONE MILLion three hunded and fiv,1000305.0,1000305.0,True
91
+ "50,500 and six",50506.0,50506.0,True
92
+ one_million_and_five,1000005.0,1000005.0,True
93
+ 2.0,2.0,2.0,True
94
+ 4.5,4.5,4.5,True
95
+ 12345.001,12345.001,12345.001,True
96
+ 7..0,7.0,7.0,True
97
+ 0.06,0.06,0.06,True
98
+ "0,25",0.25,25.0,False
99
+ o.45,0.45,32202.0,False
100
+ 0.1.2,0.12,32202.0,False
101
+ 0.00009,9e-05,9e-05,True
102
+ 0.01.,0.01,0.01,True
103
+ I don't know 8,8.0,8.0,True
104
+ "You're wrong it's not 20, it's 45",45.0,20.0,False
105
+ I don't understand why it's 19,19.0,19.0,True
mathtext_fastapi/nlu.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from mathtext_fastapi.logging import prepare_message_data_for_logging
2
  from mathtext.sentiment import sentiment
3
  from mathtext.text2int import text2int
@@ -8,27 +9,41 @@ def build_nlu_response_object(type, data, confidence):
8
  """ Turns nlu results into an object to send back to Turn.io
9
  Inputs
10
  - type: str - the type of nlu run (integer or sentiment-analysis)
11
- - data: str - the student message
12
  - confidence: - the nlu confidence score (sentiment) or '' (integer)
 
 
 
 
 
 
13
  """
14
  return {'type': type, 'data': data, 'confidence': confidence}
15
 
16
 
17
- def test_for_float_or_int(message_data, message_text):
18
- nlu_response = {}
19
- if type(message_text) == int or type(message_text) == float:
20
- nlu_response = build_nlu_response_object('integer', message_text, '')
21
- prepare_message_data_for_logging(message_data, nlu_response)
22
- return nlu_response
23
 
24
 
25
  def test_for_number_sequence(message_text_arr, message_data, message_text):
 
 
 
 
 
 
 
 
26
  nlu_response = {}
27
  if all(ele.isdigit() for ele in message_text_arr):
28
  nlu_response = build_nlu_response_object(
29
  'integer',
30
  ','.join(message_text_arr),
31
- ''
32
  )
33
  prepare_message_data_for_logging(message_data, nlu_response)
34
  return nlu_response
@@ -42,6 +57,9 @@ def run_text2int_on_each_list_item(message_text_arr):
42
 
43
  Output
44
  - student_response_arr: list - a set of integers (32202 for error code)
 
 
 
45
  """
46
  student_response_arr = []
47
  for student_response in message_text_arr:
@@ -51,12 +69,63 @@ def run_text2int_on_each_list_item(message_text_arr):
51
 
52
 
53
  def run_sentiment_analysis(message_text):
 
 
 
 
 
 
 
 
54
  # TODO: Add intent labelling here
55
  # TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
56
  return sentiment(message_text)
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def evaluate_message_with_nlu(message_data):
 
 
 
 
 
 
 
 
60
  # Keeps system working with two different inputs - full and filtered @event object
61
  try:
62
  message_text = message_data['message_body']
@@ -76,6 +145,13 @@ def evaluate_message_with_nlu(message_data):
76
  number_api_resp = text2int(message_text.lower())
77
 
78
  if number_api_resp == 32202:
 
 
 
 
 
 
 
79
  sentiment_api_resp = sentiment(message_text)
80
  nlu_response = build_nlu_response_object(
81
  'sentiment',
 
1
+ from fuzzywuzzy import fuzz
2
  from mathtext_fastapi.logging import prepare_message_data_for_logging
3
  from mathtext.sentiment import sentiment
4
  from mathtext.text2int import text2int
 
9
  """ Turns nlu results into an object to send back to Turn.io
10
  Inputs
11
  - type: str - the type of nlu run (integer or sentiment-analysis)
12
+ - data: str/int - the student message
13
  - confidence: - the nlu confidence score (sentiment) or '' (integer)
14
+
15
+ >>> build_nlu_response_object('integer', 8, 0)
16
+ {'type': 'integer', 'data': 8, 'confidence': 0}
17
+
18
+ >>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
19
+ {'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
20
  """
21
  return {'type': type, 'data': data, 'confidence': confidence}
22
 
23
 
24
+ # def test_for_float_or_int(message_data, message_text):
25
+ # nlu_response = {}
26
+ # if type(message_text) == int or type(message_text) == float:
27
+ # nlu_response = build_nlu_response_object('integer', message_text, '')
28
+ # prepare_message_data_for_logging(message_data, nlu_response)
29
+ # return nlu_response
30
 
31
 
32
  def test_for_number_sequence(message_text_arr, message_data, message_text):
33
+ """ Determines if the student's message is a sequence of numbers
34
+
35
+ >>> test_for_number_sequence(['1','2','3'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, '1, 2, 3')
36
+ {'type': 'integer', 'data': '1,2,3', 'confidence': 0}
37
+
38
+ >>> test_for_number_sequence(['a','b','c'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, 'a, b, c')
39
+ {}
40
+ """
41
  nlu_response = {}
42
  if all(ele.isdigit() for ele in message_text_arr):
43
  nlu_response = build_nlu_response_object(
44
  'integer',
45
  ','.join(message_text_arr),
46
+ 0
47
  )
48
  prepare_message_data_for_logging(message_data, nlu_response)
49
  return nlu_response
 
57
 
58
  Output
59
  - student_response_arr: list - a set of integers (32202 for error code)
60
+
61
+ >>> run_text2int_on_each_list_item(['1','2','3'])
62
+ [1, 2, 3]
63
  """
64
  student_response_arr = []
65
  for student_response in message_text_arr:
 
69
 
70
 
71
  def run_sentiment_analysis(message_text):
72
+ """ Evaluates the sentiment of a student message
73
+
74
+ >>> run_sentiment_analysis("I am tired")
75
+ [{'label': 'NEGATIVE', 'score': 0.9997807145118713}]
76
+
77
+ >>> run_sentiment_analysis("I am full of joy")
78
+ [{'label': 'POSITIVE', 'score': 0.999882698059082}]
79
+ """
80
  # TODO: Add intent labelling here
81
  # TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
82
  return sentiment(message_text)
83
 
84
 
85
+ def run_intent_classification(message_text):
86
+ """ Process a student's message using basic fuzzy text comparison
87
+
88
+ >>> run_intent_classification("exit")
89
+ {'type': 'intent', 'data': 'exit', 'confidence': 1.0}
90
+ >>> run_intent_classification("exi")
91
+ {'type': 'intent', 'data': 'exit', 'confidence': 0.86}
92
+ >>> run_intent_classification("eas")
93
+ {'type': 'intent', 'data': '', 'confidence': 0}
94
+ >>> run_intent_classification("hard")
95
+ {'type': 'intent', 'data': '', 'confidence': 0}
96
+ >>> run_intent_classification("hardier")
97
+ {'type': 'intent', 'data': 'harder', 'confidence': 0.92}
98
+ """
99
+ label = ''
100
+ ratio = 0
101
+ nlu_response = {'type': 'intent', 'data': label, 'confidence': ratio}
102
+ commands = [
103
+ 'easier',
104
+ 'exit',
105
+ 'harder',
106
+ 'hint',
107
+ 'next'
108
+ 'stop',
109
+ ]
110
+
111
+ for command in commands:
112
+ ratio = fuzz.ratio(command, message_text.lower())
113
+ if ratio > 80:
114
+ nlu_response['data'] = command
115
+ nlu_response['confidence'] = ratio / 100
116
+
117
+ return nlu_response
118
+
119
+
120
  def evaluate_message_with_nlu(message_data):
121
+ """ Process a student's message using NLU functions and send the result
122
+
123
+ >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "8", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
124
+ {'type': 'integer', 'data': 8, 'confidence': 0}
125
+
126
+ >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
127
+ {'type': 'sentiment', 'data': 'NEGATIVE', 'confidence': 0.9997807145118713}
128
+ """
129
  # Keeps system working with two different inputs - full and filtered @event object
130
  try:
131
  message_text = message_data['message_body']
 
145
  number_api_resp = text2int(message_text.lower())
146
 
147
  if number_api_resp == 32202:
148
+ print("MESSAGE TEXT")
149
+ print(message_text)
150
+ print("============")
151
+ intent_api_response = run_intent_classification(message_text)
152
+ if intent_api_response['data']:
153
+ return intent_api_response
154
+
155
  sentiment_api_resp = sentiment(message_text)
156
  nlu_response = build_nlu_response_object(
157
  'sentiment',
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
  dill
2
- en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl
 
3
  jsonpickle
4
  mathtext @ git+https://gitlab.com/tangibleai/community/mathtext@main
5
  fastapi==0.74.*
6
  pydantic==1.10.*
 
7
  requests==2.27.*
8
  sentencepiece==0.1.*
9
  supabase
 
1
  dill
2
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl
3
+ fuzzywuzzy
4
  jsonpickle
5
  mathtext @ git+https://gitlab.com/tangibleai/community/mathtext@main
6
  fastapi==0.74.*
7
  pydantic==1.10.*
8
+ python-Levenshtein
9
  requests==2.27.*
10
  sentencepiece==0.1.*
11
  supabase
scripts/make_request.py CHANGED
@@ -58,8 +58,8 @@ def run_simulated_request(endpoint, sample_answer, context=None):
58
  print(request)
59
 
60
 
61
- # run_simulated_request('sentiment-analysis', 'I reject it')
62
- # run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
63
  run_simulated_request('nlu', 'test message')
64
  run_simulated_request('nlu', 'eight')
65
  run_simulated_request('nlu', 'is it 8')
 
58
  print(request)
59
 
60
 
61
+ run_simulated_request('sentiment-analysis', 'I reject it')
62
+ run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
63
  run_simulated_request('nlu', 'test message')
64
  run_simulated_request('nlu', 'eight')
65
  run_simulated_request('nlu', 'is it 8')