sviddo commited on
Commit
dba933d
·
2 Parent(s): d6e6567 e6d0144

Merge branch 'feature-wormhole' into 'vlad'

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Mathtext Fastapi
3
  emoji: 🐨
4
  colorFrom: blue
5
  colorTo: red
 
1
  ---
2
+ title: Mathtext Wormhole
3
  emoji: 🐨
4
  colorFrom: blue
5
  colorTo: red
app.py CHANGED
@@ -15,7 +15,9 @@ from pydantic import BaseModel
15
 
16
  from mathtext_fastapi.logging import prepare_message_data_for_logging
17
  from mathtext_fastapi.conversation_manager import manage_conversation_response
 
18
  from mathtext_fastapi.nlu import evaluate_message_with_nlu
 
19
 
20
  app = FastAPI()
21
 
@@ -53,7 +55,7 @@ def text2int_ep(content: Text = None):
53
  return JSONResponse(content=content)
54
 
55
 
56
- @app.post("/manager")
57
  async def programmatic_message_manager(request: Request):
58
  """
59
  Calls conversation management function to determine the next state
@@ -86,6 +88,46 @@ async def programmatic_message_manager(request: Request):
86
  return JSONResponse(context)
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  @app.post("/nlu")
90
  async def evaluate_user_message_with_nlu_api(request: Request):
91
  """ Calls nlu evaluation and returns the nlu_response
@@ -95,8 +137,8 @@ async def evaluate_user_message_with_nlu_api(request: Request):
95
 
96
  Output
97
  - int_data_dict or sent_data_dict: dict - the type of NLU run and result
98
- {'type':'integer', 'data': '8'}
99
- {'type':'sentiment', 'data': 'negative'}
100
  """
101
  data_dict = await request.json()
102
  message_data = data_dict.get('message_data', '')
 
15
 
16
  from mathtext_fastapi.logging import prepare_message_data_for_logging
17
  from mathtext_fastapi.conversation_manager import manage_conversation_response
18
+ from mathtext_fastapi.v2_conversation_manager import manage_conversation_response
19
  from mathtext_fastapi.nlu import evaluate_message_with_nlu
20
+ from mathtext_fastapi.nlu import run_intent_classification
21
 
22
  app = FastAPI()
23
 
 
55
  return JSONResponse(content=content)
56
 
57
 
58
+ @app.post("/v1/manager")
59
  async def programmatic_message_manager(request: Request):
60
  """
61
  Calls conversation management function to determine the next state
 
88
  return JSONResponse(context)
89
 
90
 
91
+ @app.post("/v2/manager")
92
+ async def programmatic_message_manager(request: Request):
93
+ """
94
+ Calls conversation management function to determine the next state
95
+
96
+ Input
97
+ request.body: dict - message data for the most recent user response
98
+ {
99
+ "author_id": "+47897891",
100
+ "contact_uuid": "j43hk26-2hjl-43jk-hnk2-k4ljl46j0ds09",
101
+ "author_type": "OWNER",
102
+ "message_body": "a test message",
103
+ "message_direction": "inbound",
104
+ "message_id": "ABJAK64jlk3-agjkl2QHFAFH",
105
+ "message_inserted_at": "2022-07-05T04:00:34.03352Z",
106
+ "message_updated_at": "2023-02-14T03:54:19.342950Z",
107
+ }
108
+
109
+ Output
110
+ context: dict - the information for the current state
111
+ {
112
+ "user": "47897891",
113
+ "state": "welcome-message-state",
114
+ "bot_message": "Welcome to Rori!",
115
+ "user_message": "",
116
+ "type": "ask"
117
+ }
118
+ """
119
+ data_dict = await request.json()
120
+ context = manage_conversation_response(data_dict)
121
+ return JSONResponse(context)
122
+
123
+
124
+ @app.post("/intent-classification")
125
+ def intent_classification_ep(content: Text = None):
126
+ ml_response = run_intent_classification(content.content)
127
+ content = {"message": ml_response}
128
+ return JSONResponse(content=content)
129
+
130
+
131
  @app.post("/nlu")
132
  async def evaluate_user_message_with_nlu_api(request: Request):
133
  """ Calls nlu evaluation and returns the nlu_response
 
137
 
138
  Output
139
  - int_data_dict or sent_data_dict: dict - the type of NLU run and result
140
+ {'type':'integer', 'data': '8', 'confidence': 0}
141
+ {'type':'sentiment', 'data': 'negative', 'confidence': 0.99}
142
  """
143
  data_dict = await request.json()
144
  message_data = data_dict.get('message_data', '')
mathtext_fastapi/curriculum_mapper.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import re
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def read_and_preprocess_spreadsheet(file_name):
9
+ """ Creates a pandas dataframe from the curriculum overview spreadsheet """
10
+ DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / file_name
11
+ script_df = pd.read_excel(DATA_DIR, engine='openpyxl')
12
+ # Ensures the grade level columns are integers instead of floats
13
+ script_df.columns = script_df.columns[:2].tolist() + script_df.columns[2:11].astype(int).astype(str).tolist() + script_df.columns[11:].tolist()
14
+ script_df.fillna('', inplace=True)
15
+ return script_df
16
+
17
+
18
+ def extract_skill_code(skill):
19
+ """ Looks within a curricular skill description for its descriptive code
20
+
21
+ Input
22
+ - skill: str - a brief description of a curricular skill
23
+
24
+ >>> extract_skill_code('A3.3.4 - Solve inequalities')
25
+ 'A3.3.4'
26
+ >>> extract_skill_code('A3.3.2 - Graph linear equations, and identify the x- and y-intercepts or the slope of a line')
27
+ 'A3.3.2'
28
+ """
29
+ pattern = r'[A-Z][0-9]\.\d+\.\d+'
30
+ result = re.search(pattern, skill)
31
+ return result.group()
32
+
33
+
34
+ def build_horizontal_transitions(script_df):
35
+ """ Build a list of transitional relationships within a curricular skill
36
+
37
+ Inputs
38
+ - script_df: pandas dataframe - an overview of the curriculum skills by grade level
39
+
40
+ Output
41
+ - horizontal_transitions: array of arrays - transition data with label, from state, and to state
42
+
43
+ >>> script_df = read_and_preprocess_spreadsheet('curriculum_framework_for_tests.xlsx')
44
+ >>> build_horizontal_transitions(script_df)
45
+ [['right', 'N1.1.1_G1', 'N1.1.1_G2'], ['right', 'N1.1.1_G2', 'N1.1.1_G3'], ['right', 'N1.1.1_G3', 'N1.1.1_G4'], ['right', 'N1.1.1_G4', 'N1.1.1_G5'], ['right', 'N1.1.1_G5', 'N1.1.1_G6'], ['left', 'N1.1.1_G6', 'N1.1.1_G5'], ['left', 'N1.1.1_G5', 'N1.1.1_G4'], ['left', 'N1.1.1_G4', 'N1.1.1_G3'], ['left', 'N1.1.1_G3', 'N1.1.1_G2'], ['left', 'N1.1.1_G2', 'N1.1.1_G1'], ['right', 'N1.1.2_G1', 'N1.1.2_G2'], ['right', 'N1.1.2_G2', 'N1.1.2_G3'], ['right', 'N1.1.2_G3', 'N1.1.2_G4'], ['right', 'N1.1.2_G4', 'N1.1.2_G5'], ['right', 'N1.1.2_G5', 'N1.1.2_G6'], ['left', 'N1.1.2_G6', 'N1.1.2_G5'], ['left', 'N1.1.2_G5', 'N1.1.2_G4'], ['left', 'N1.1.2_G4', 'N1.1.2_G3'], ['left', 'N1.1.2_G3', 'N1.1.2_G2'], ['left', 'N1.1.2_G2', 'N1.1.2_G1']]
46
+ """
47
+ horizontal_transitions = []
48
+ for index, row in script_df.iterrows():
49
+ skill_code = extract_skill_code(row['Knowledge or Skill'])
50
+
51
+ rightward_matches = []
52
+ for i in range(9):
53
+ # Grade column
54
+ current_grade = i+1
55
+ if row[current_grade].lower().strip() == 'x':
56
+ rightward_matches.append(i)
57
+
58
+ for match in rightward_matches:
59
+ if rightward_matches[-1] != match:
60
+ horizontal_transitions.append([
61
+ "right",
62
+ f"{skill_code}_G{match}",
63
+ f"{skill_code}_G{match+1}"
64
+ ])
65
+
66
+ leftward_matches = []
67
+ for i in reversed(range(9)):
68
+ current_grade = i
69
+ if row[current_grade].lower().strip() == 'x':
70
+ leftward_matches.append(i)
71
+
72
+ for match in leftward_matches:
73
+ if leftward_matches[0] != match:
74
+ horizontal_transitions.append([
75
+ "left",
76
+ f"{skill_code}_G{match}",
77
+ f"{skill_code}_G{match-1}"
78
+ ])
79
+
80
+ return horizontal_transitions
81
+
82
+
83
+ def gather_all_vertical_matches(script_df):
84
+ """ Build a list of transitional relationships within a grade level across skills
85
+
86
+ Inputs
87
+ - script_df: pandas dataframe - an overview of the curriculum skills by grade level
88
+
89
+ Output
90
+ - all_matches: array of arrays - represents skills at each grade level
91
+
92
+ >>> script_df = read_and_preprocess_spreadsheet('curriculum_framework_for_tests.xlsx')
93
+ >>> gather_all_vertical_matches(script_df)
94
+ [['N1.1.1', '1'], ['N1.1.2', '1'], ['N1.1.1', '2'], ['N1.1.2', '2'], ['N1.1.1', '3'], ['N1.1.2', '3'], ['N1.1.1', '4'], ['N1.1.2', '4'], ['N1.1.1', '5'], ['N1.1.2', '5'], ['N1.1.1', '6'], ['N1.1.2', '6']]
95
+ """
96
+ all_matches = []
97
+ columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
98
+
99
+ for column in columns:
100
+ for index, value in script_df[column].iteritems():
101
+ row_num = index + 1
102
+ if value == 'x':
103
+ # Extract skill code
104
+ skill_code = extract_skill_code(
105
+ script_df['Knowledge or Skill'][row_num-1]
106
+ )
107
+
108
+ all_matches.append([skill_code, column])
109
+ return all_matches
110
+
111
+
112
+ def build_vertical_transitions(script_df):
113
+ """ Build a list of transitional relationships within a grade level across skills
114
+
115
+ Inputs
116
+ - script_df: pandas dataframe - an overview of the curriculum skills by grade level
117
+
118
+ Output
119
+ - vertical_transitions: array of arrays - transition data with label, from state, and to state
120
+
121
+ >>> script_df = read_and_preprocess_spreadsheet('curriculum_framework_for_tests.xlsx')
122
+ >>> build_vertical_transitions(script_df)
123
+ [['down', 'N1.1.1_G1', 'N1.1.2_G1'], ['down', 'N1.1.2_G1', 'N1.1.1_G1'], ['down', 'N1.1.1_G2', 'N1.1.2_G2'], ['down', 'N1.1.2_G2', 'N1.1.1_G2'], ['down', 'N1.1.1_G3', 'N1.1.2_G3'], ['down', 'N1.1.2_G3', 'N1.1.1_G3'], ['down', 'N1.1.1_G4', 'N1.1.2_G4'], ['down', 'N1.1.2_G4', 'N1.1.1_G4'], ['down', 'N1.1.1_G5', 'N1.1.2_G5'], ['down', 'N1.1.2_G5', 'N1.1.1_G5'], ['down', 'N1.1.1_G6', 'N1.1.2_G6'], ['up', 'N1.1.2_G6', 'N1.1.1_G6'], ['up', 'N1.1.1_G6', 'N1.1.2_G6'], ['up', 'N1.1.2_G5', 'N1.1.1_G5'], ['up', 'N1.1.1_G5', 'N1.1.2_G5'], ['up', 'N1.1.2_G4', 'N1.1.1_G4'], ['up', 'N1.1.1_G4', 'N1.1.2_G4'], ['up', 'N1.1.2_G3', 'N1.1.1_G3'], ['up', 'N1.1.1_G3', 'N1.1.2_G3'], ['up', 'N1.1.2_G2', 'N1.1.1_G2'], ['up', 'N1.1.1_G2', 'N1.1.2_G2'], ['up', 'N1.1.2_G1', 'N1.1.1_G1']]
124
+ """
125
+ vertical_transitions = []
126
+
127
+ all_matches = gather_all_vertical_matches(script_df)
128
+
129
+ # Downward
130
+ for index, match in enumerate(all_matches):
131
+ skill = match[0]
132
+ row_num = match[1]
133
+ if all_matches[-1] != match:
134
+ vertical_transitions.append([
135
+ "down",
136
+ f"{skill}_G{row_num}",
137
+ f"{all_matches[index+1][0]}_G{row_num}"
138
+ ])
139
+
140
+ # Upward
141
+ for index, match in reversed(list(enumerate(all_matches))):
142
+ skill = match[0]
143
+ row_num = match[1]
144
+ if all_matches[0] != match:
145
+ vertical_transitions.append([
146
+ "up",
147
+ f"{skill}_G{row_num}",
148
+ f"{all_matches[index-1][0]}_G{row_num}"
149
+ ])
150
+
151
+ return vertical_transitions
152
+
153
+
154
+ def build_all_states(all_transitions):
155
+ """ Creates an array with all state labels for the curriculum
156
+
157
+ Input
158
+ - all_transitions: list of lists - all possible up, down, left, or right transitions in curriculum
159
+
160
+ Output
161
+ - all_states: list - a collection of state labels (skill code and grade number)
162
+
163
+ >>> all_transitions = [['right', 'N1.1.1_G1', 'N1.1.1_G2'], ['right', 'N1.1.1_G2', 'N1.1.1_G3'], ['right', 'N1.1.1_G3', 'N1.1.1_G4'], ['right', 'N1.1.1_G4', 'N1.1.1_G5'], ['right', 'N1.1.1_G5', 'N1.1.1_G6'], ['left', 'N1.1.1_G6', 'N1.1.1_G5'], ['left', 'N1.1.1_G5', 'N1.1.1_G4'], ['left', 'N1.1.1_G4', 'N1.1.1_G3'], ['left', 'N1.1.1_G3', 'N1.1.1_G2'], ['left', 'N1.1.1_G2', 'N1.1.1_G1'], ['right', 'N1.1.2_G1', 'N1.1.2_G2'], ['right', 'N1.1.2_G2', 'N1.1.2_G3'], ['right', 'N1.1.2_G3', 'N1.1.2_G4'], ['right', 'N1.1.2_G4', 'N1.1.2_G5'], ['right', 'N1.1.2_G5', 'N1.1.2_G6'], ['left', 'N1.1.2_G6', 'N1.1.2_G5'], ['left', 'N1.1.2_G5', 'N1.1.2_G4'], ['left', 'N1.1.2_G4', 'N1.1.2_G3'], ['left', 'N1.1.2_G3', 'N1.1.2_G2'], ['left', 'N1.1.2_G2', 'N1.1.2_G1'], ['down', 'N1.1.1_G1', 'N1.1.2_G1'], ['down', 'N1.1.2_G1', 'N1.1.1_G1'], ['down', 'N1.1.1_G2', 'N1.1.2_G2'], ['down', 'N1.1.2_G2', 'N1.1.1_G2'], ['down', 'N1.1.1_G3', 'N1.1.2_G3'], ['down', 'N1.1.2_G3', 'N1.1.1_G3'], ['down', 'N1.1.1_G4', 'N1.1.2_G4'], ['down', 'N1.1.2_G4', 'N1.1.1_G4'], ['down', 'N1.1.1_G5', 'N1.1.2_G5'], ['down', 'N1.1.2_G5', 'N1.1.1_G5'], ['down', 'N1.1.1_G6', 'N1.1.2_G6'], ['up', 'N1.1.2_G6', 'N1.1.1_G6'], ['up', 'N1.1.1_G6', 'N1.1.2_G6'], ['up', 'N1.1.2_G5', 'N1.1.1_G5'], ['up', 'N1.1.1_G5', 'N1.1.2_G5'], ['up', 'N1.1.2_G4', 'N1.1.1_G4'], ['up', 'N1.1.1_G4', 'N1.1.2_G4'], ['up', 'N1.1.2_G3', 'N1.1.1_G3'], ['up', 'N1.1.1_G3', 'N1.1.2_G3'], ['up', 'N1.1.2_G2', 'N1.1.1_G2'], ['up', 'N1.1.1_G2', 'N1.1.2_G2'], ['up', 'N1.1.2_G1', 'N1.1.1_G1']]
164
+ >>> build_all_states(all_transitions)
165
+ ['N1.1.1_G1', 'N1.1.1_G2', 'N1.1.1_G3', 'N1.1.1_G4', 'N1.1.1_G5', 'N1.1.1_G6', 'N1.1.2_G1', 'N1.1.2_G2', 'N1.1.2_G3', 'N1.1.2_G4', 'N1.1.2_G5', 'N1.1.2_G6']
166
+ """
167
+ all_states = []
168
+ for transition in all_transitions:
169
+ for index, state in enumerate(transition):
170
+ if index == 0:
171
+ continue
172
+ if state not in all_states:
173
+ all_states.append(state)
174
+ return all_states
175
+
176
+
177
+ def build_curriculum_logic():
178
+ script_df = read_and_preprocess_spreadsheet('Rori_Framework_v1.xlsx')
179
+ horizontal_transitions = build_horizontal_transitions(script_df)
180
+ vertical_transitions = build_vertical_transitions(script_df)
181
+ all_transitions = horizontal_transitions + vertical_transitions
182
+ all_states = build_all_states(all_transitions)
183
+ return all_states, all_transitions
mathtext_fastapi/data/Rori_Framework_v1.xlsx ADDED
Binary file (420 kB). View file
 
mathtext_fastapi/data/curriculum_framework_for_tests.xlsx ADDED
Binary file (510 kB). View file
 
mathtext_fastapi/data/intent_classification_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4954368c3b95673167ce347f2962b5508c4af295b6af58b6c11b3c1075b42e
3
+ size 127903
mathtext_fastapi/data/labeled_data.csv ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Utterance,Label
2
+ skip this,skip
3
+ this is stupid,skip
4
+ this is stupid,harder
5
+ this is stupid,feedback
6
+ I'm done,exit
7
+ quit,exit
8
+ I don't know,hint
9
+ help,hint
10
+ can I do something else?,main menu
11
+ what's going on,rapport
12
+ what's going on,main menu
13
+ tell me a joke,rapport
14
+ tell me a joke,main menu
15
+ Sorry I don't understand,do not know
16
+ Ten thousand,number
17
+ 1.234,number
18
+ "10,000",number
19
+ "123, 456",numbers
20
+ "11, 12, 13",numbers
21
+ "100, 200, 300",numbers
22
+ "100, 200",numbers
23
+ Stop for a minute,wait
24
+ Bye bye,exit
25
+ Good night,exit
26
+ Am done,exit
27
+ Yes,yes
28
+ Help,help
29
+ Idiot,harder
30
+ Stop,exit
31
+ I don't get it,hint
32
+ Math,main menu
33
+ Math,math topic
34
+ Tomorrow let do math,wait
35
+ Later,wait
36
+ Pls i will continue pls,skip
37
+ Rori tell me now,help
38
+ harder,skip
39
+ Stop for now i wont to go to School,exit
40
+ Next,next
41
+ Okay,okay
42
+ Great,affirmation
43
+ Give me for example,example
44
+ No I want to learn algebraic expressions,algebra
45
+ Hi rori,greeting
46
+ *help*,help
47
+ *Next*,next
48
+ Okay nice,okay
49
+ I don't know it,hint
50
+ Nex,next
51
+ I need a help,hint
52
+ Please can I ask your any math questions?,faq
53
+ The answer is 1,answer
54
+ The answer is 1,number
55
+ But 0.8 is also same as . 8 so I was actually right,I'm right
56
+ What is the number system?,faq
57
+ Ok thanks,thanks
58
+ I'm going to school now,exit
59
+ Let's move to another topic,main menu
60
+ "Ummanni saba
61
+ Kebena bara kana galmi keenya inni guddaan bilisummaa qofa #Gabrummaan_ammaan booda_gaha namni hundi bakka jiru irraa kutatee ka,ee jira obboleewwan goototni keenya jiran haqa Kebenaaf jechaa jiru Guraandhala 29 booda walabummaa keenya labsina Dhugaa qabna Ni injifanna *** . Naannoo giddu galeessa Itoophiyaatti #Kebenaan aanaa addaati Kun murtoo ummata Kebenaa hundaati",spam
62
+ Yes it,yes
63
+ U type fast,too fast
64
+ I mean your typing is fast,too fast
65
+ Why do u type so fast,too fast
66
+ Ur typing is fast,too fast
67
+ Can we go to a real work,harder
68
+ I know all this,harder
69
+ Answer this,preamble
70
+ Am tired,exit
71
+ This is not what I asked for,main menu
72
+ Bye,exit
73
+ 😱😱😂😂😂😡😰😰😰😒,spam
74
+ Gbxbxbcbcbbcbchcbchc,spam
75
+ I want to solve math,math topic
76
+ Pleas let start with the fraction,fractions topic
77
+ Okey,okay
78
+ i need substraction,subtraction topic
79
+ Can you please stop with me,exit
80
+ Another one,next
81
+ Harder or easy,main menu
82
+ Hard or easier,main menu
83
+ Jump topic,menu
84
+ Got it,okay
85
+ I didn't understand,don't know
86
+ Don't understand,don't know
87
+ Excuse me pls,hint
88
+ Let stop for today,exit
89
+ Help and stop asking me stupid questions,
90
+ Ykay,okay
91
+ Not interested in solving this,menu
92
+ Stpo,exit
93
+ Hiiiiiii,greeting
94
+ Hi rori,greeting
95
+ I've done this things before,harder
96
+ Which number my phone number,
97
+ Unit,main menu
98
+ No ide,don't know
99
+ No ide,hint
100
+ No idea,don't know
101
+ 🙈🤩😇🙏,spam
102
+ Thank u,thanks
103
+ Do you know programming,faq
104
+ Delete my number,unsubscribe
105
+ See u,exit
106
+ Can I go for break ??,wait
107
+ I wanna fuck,profanity
108
+ Enough of this nw,exit
109
+ Can we move to equations,equations
110
+ Do you know you are an idiot,insult
111
+ 3 digit number,number
112
+ 3 digit number,answer
113
+ Three digit number,confident answer
114
+ Three digit number,number
115
+ Good evening Rori,greeting
116
+ 89 Next,answer
117
+ 89 Next,number
118
+ 3 digit number,answer
119
+ Three digit number,answer
120
+ This is too simple,harder
121
+ Am not a kid,harder
122
+ Hey Miss Roribcan you ask me some question from Secondary 2,greeting
123
+ Hey Miss Roribcan you ask me some question from Secondary 2,faq
124
+ Hey Miss Roribcan you ask me some question from Secondary 2,main menu
125
+ don't know,hint
126
+ don't know,easier
127
+ 𝑴𝒂𝒕𝒉,math
128
+ Rori can you help me to gat value,
129
+ I called but u are not picking up,
130
+ 0.3 answer,answer
131
+ Sorry rori was101,answer
132
+ Y is it 6,answer
133
+ Y is it 6,number
134
+ 0.3 answer,number
135
+ Why 0.5,more explanation
136
+ Why 0.5,number
137
+ 6\nNext,Next
138
+ How is the answer is 11,more explanation
139
+ How comes we have 11,more explanation
140
+ Yes 6,answer
141
+ Yes 6,number
142
+ 6\nNext,number
143
+ How is the answer is 11,number
144
+ How comes we have 11,number
mathtext_fastapi/data/text2int_results.csv CHANGED
@@ -1,92 +1,105 @@
1
  input,output,text2int,score
2
- notanumber,32202,32202,True
3
- this is not a number,32202,32202,True
4
- fourteen,14,14,True
5
- forteen,14,14,True
6
- one thousand four hundred ninety two,1492,1492,True
7
- one thousand ninety two,1092,1092,True
8
- Fourteen Hundred Ninety-Two,1492,1492,True
9
- Fourteen Hundred,1400,1400,True
10
- Ninety nine,99,99,True
11
- fifteen thousand five hundred-sixty,15560,15560,True
12
- three hundred fifty,350,350,True
13
- one nine eight five,1985,1985,True
14
- nineteen eighty-five,1985,1605,False
15
- oh one,1,1,True
16
- six oh 1,601,601,True
17
- sex,6,6,True
18
- six,6,6,True
19
- eight oh,80,8,False
20
- eighty,80,80,True
21
- ate,8,1,False
22
- double eight,88,32202,False
23
- eight three seven five three O nine,8375309,8375329,False
24
- eight three seven five three oh nine,8375309,8375309,True
25
- eight three seven five three zero nine,8375309,8375309,True
26
- eight three seven five three oh ni-ee-ine,8375309,837530611,False
27
- two eight,28,16,False
28
- seven oh eleven,7011,77,False
29
- seven elevens,77,77,True
30
- seven eleven,711,77,False
31
- ninety nine oh five,9905,149,False
32
- seven 0 seven 0 seven 0 seven,7070707,7070707,True
33
- 123 hundred,123000,223,False
34
- 5 o 5,505,525,False
35
- 15 o 5,1505,22,False
36
- 15-o 5,1505,22,False
37
- 15 o-5,1505,22,False
38
- 911-thousand,911000,911000,True
39
- twenty-two twenty-two,2222,44,False
40
- twenty-two twenty-twos,484,44,False
41
- four eighty four,484,404,False
42
- four eighties,320,72,False
43
- four eighties and nine nineties,1130,243,False
44
- ninety nine hundred and seventy seven,9977,276,False
45
- seven thousands,7000,7000,True
46
- 2 hundreds,200,200,True
47
- 99 thousands and one,99001,99001,True
48
- "forty-five thousand, seven hundred and nine",45709,1161,False
49
- eighty eight hundred eighty,8880,268,False
50
- a hundred hundred,10000,32202,False
51
- a hundred thousand,100000,32202,False
52
- a hundred million,100000000,32202,False
53
- nineteen ninety nine,1999,1809,False
54
- forteen twenty seven,1427,307,False
55
- seventeen-thousand and seventy two,17072,17072,True
56
- two hundred and nine,209,209,True
57
- two thousand ten,2010,2010,True
58
- two thousand and ten,2010,2010,True
59
- twelve million,12000000,12000000,True
60
- 8 billion,8000000000,8000000000,True
61
- twenty ten,2010,2010,True
62
- thirty-two hundred,3200,3200,True
63
- nine,9,9,True
64
- forty two,42,42,True
65
- 1 2 three,123,123,True
66
- fourtean,14,14,True
67
- one tousand four hundred ninty two,1492,1492,True
68
- Furteen Hundrd Ninety-Too,1492,1492,True
69
- forrteen,14,14,True
70
- sevnteen-thosand and seventy two,17072,17072,True
71
- ninety nine hundred ad seventy seven,9977,32202,False
72
- seven thusands,7000,7000,True
73
- 2 hunreds,200,200,True
74
- 99 tousands and one,99001,99001,True
75
- eighty ate hundred eighty,8880,261,False
76
- fourteen Hundred,1400,1400,True
77
- 8 Bilion,8000000000,8000000,False
78
- one million three thousand one,1003001,1003001,True
79
- four million nine thousand seven,4009007,4009007,True
80
- two million five hundred thousand,2500000,2001500,False
81
- two tousand ten,2010,2010,True
82
- two thousand teen,2010,2007,False
83
- tvelve milion,12000000,12000000,True
84
- tventy ten,2010,2010,True
85
- tirty-twoo hunred,3200,3200,True
86
- sevn thoosands,7000,7000,True
87
- five,5,5,True
88
- ten,10,10,True
89
- one two three and ten,12310,51,False
90
- ONE MILLion three hunded and fiv,1000305,1000305,True
91
- "50,500 and six",50506,50506,True
92
- one_million_and_five,1000005,1000005,True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  input,output,text2int,score
2
+ notanumber,32202.0,32202.0,True
3
+ this is not a number,32202.0,32202.0,True
4
+ fourteen,14.0,14.0,True
5
+ forteen,14.0,14.0,True
6
+ one thousand four hundred ninety two,1492.0,1492.0,True
7
+ one thousand ninety two,1092.0,1092.0,True
8
+ Fourteen Hundred Ninety-Two,1492.0,1492.0,True
9
+ Fourteen Hundred,1400.0,1400.0,True
10
+ Ninety nine,99.0,99.0,True
11
+ fifteen thousand five hundred-sixty,15560.0,15560.0,True
12
+ three hundred fifty,350.0,350.0,True
13
+ one nine eight five,1985.0,1985.0,True
14
+ nineteen eighty-five,1985.0,1605.0,False
15
+ oh one,1.0,1.0,True
16
+ six oh 1,601.0,601.0,True
17
+ sex,6.0,6.0,True
18
+ six,6.0,6.0,True
19
+ eight oh,80.0,8.0,False
20
+ eighty,80.0,80.0,True
21
+ ate,8.0,1.0,False
22
+ double eight,88.0,8.0,False
23
+ eight three seven five three O nine,8375309.0,8375319.0,False
24
+ eight three seven five three oh nine,8375309.0,8375309.0,True
25
+ eight three seven five three zero nine,8375309.0,8375309.0,True
26
+ eight three seven five three oh ni-ee-ine,8375309.0,837530111.0,False
27
+ two eight,28.0,16.0,False
28
+ seven oh eleven,7011.0,77.0,False
29
+ seven elevens,77.0,77.0,True
30
+ seven eleven,711.0,77.0,False
31
+ ninety nine oh five,9905.0,149.0,False
32
+ seven 0 seven 0 seven 0 seven,7070707.0,7070707.0,True
33
+ 123 hundred,123000.0,223.0,False
34
+ 5 o 5,505.0,515.0,False
35
+ 15 o 5,1505.0,21.0,False
36
+ 15-o 5,1505.0,21.0,False
37
+ 15 o-5,1505.0,21.0,False
38
+ 911-thousand,911000.0,911000.0,True
39
+ twenty-two twenty-two,2222.0,44.0,False
40
+ twenty-two twenty-twos,484.0,44.0,False
41
+ four eighty four,484.0,404.0,False
42
+ four eighties,320.0,72.0,False
43
+ four eighties and nine nineties,1130.0,243.0,False
44
+ ninety nine hundred and seventy seven,9977.0,276.0,False
45
+ seven thousands,7000.0,7000.0,True
46
+ 2 hundreds,200.0,200.0,True
47
+ 99 thousands and one,99001.0,99001.0,True
48
+ "forty-five thousand, seven hundred and nine",45709.0,1161.0,False
49
+ eighty eight hundred eighty,8880.0,268.0,False
50
+ a hundred hundred,10000.0,100.0,False
51
+ a hundred thousand,100000.0,100.0,False
52
+ a hundred million,100000000.0,100.0,False
53
+ nineteen ninety nine,1999.0,1809.0,False
54
+ forteen twenty seven,1427.0,307.0,False
55
+ seventeen-thousand and seventy two,17072.0,17072.0,True
56
+ two hundred and nine,209.0,209.0,True
57
+ two thousand ten,2010.0,2010.0,True
58
+ two thousand and ten,2010.0,2010.0,True
59
+ twelve million,12000000.0,12000000.0,True
60
+ 8 billion,8000000000.0,8000000000.0,True
61
+ twenty ten,2010.0,2010.0,True
62
+ thirty-two hundred,3200.0,3200.0,True
63
+ nine,9.0,9.0,True
64
+ forty two,42.0,42.0,True
65
+ 1 2 three,123.0,123.0,True
66
+ fourtean,14.0,14.0,True
67
+ one tousand four hundred ninty two,1492.0,1492.0,True
68
+ Furteen Hundrd Ninety-Too,1492.0,1492.0,True
69
+ forrteen,14.0,14.0,True
70
+ sevnteen-thosand and seventy two,17072.0,17072.0,True
71
+ ninety nine hundred ad seventy seven,9977.0,90.0,False
72
+ seven thusands,7000.0,7000.0,True
73
+ 2 hunreds,200.0,200.0,True
74
+ 99 tousands and one,99001.0,99001.0,True
75
+ eighty ate hundred eighty,8880.0,261.0,False
76
+ fourteen Hundred,1400.0,1400.0,True
77
+ 8 Bilion,8000000000.0,8000000.0,False
78
+ one million three thousand one,1003001.0,1003001.0,True
79
+ four million nine thousand seven,4009007.0,4009007.0,True
80
+ two million five hundred thousand,2500000.0,2001500.0,False
81
+ two tousand ten,2010.0,2010.0,True
82
+ two thousand teen,2010.0,2007.0,False
83
+ tvelve milion,12000000.0,12000000.0,True
84
+ tventy ten,2010.0,2010.0,True
85
+ tirty-twoo hunred,3200.0,3200.0,True
86
+ sevn thoosands,7000.0,7000.0,True
87
+ five,5.0,5.0,True
88
+ ten,10.0,10.0,True
89
+ one two three and ten,12310.0,51.0,False
90
+ ONE MILLion three hunded and fiv,1000305.0,1000305.0,True
91
+ "50,500 and six",50506.0,50506.0,True
92
+ one_million_and_five,1000005.0,1000005.0,True
93
+ 2.0,2.0,2.0,True
94
+ 4.5,4.5,4.5,True
95
+ 12345.001,12345.001,12345.001,True
96
+ 7..0,7.0,7.0,True
97
+ 0.06,0.06,0.06,True
98
+ "0,25",0.25,25.0,False
99
+ o.45,0.45,32202.0,False
100
+ 0.1.2,0.12,32202.0,False
101
+ 0.00009,9e-05,9e-05,True
102
+ 0.01.,0.01,0.01,True
103
+ I don't know 8,8.0,8.0,True
104
+ "You're wrong it's not 20, it's 45",45.0,20.0,False
105
+ I don't understand why it's 19,19.0,19.0,True
mathtext_fastapi/global_state_manager.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transitions import Machine
2
+ from mathtext_fastapi.curriculum_mapper import build_curriculum_logic
3
+
4
+ all_states, all_transitions = build_curriculum_logic()
5
+
6
+ class GlobalStateManager(object):
7
+ states = all_states
8
+
9
+ transitions = all_transitions
10
+
11
+ def __init__(
12
+ self,
13
+ initial_state='N1.1.1_G1',
14
+ ):
15
+ self.machine = Machine(
16
+ model=self,
17
+ states=GlobalStateManager.states,
18
+ transitions=GlobalStateManager.transitions,
19
+ initial=initial_state
20
+ )
21
+
22
+
23
+ curriculum = GlobalStateManager()
mathtext_fastapi/intent_classification.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from pathlib import Path
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.linear_model import LogisticRegression
7
+ from joblib import dump, load
8
+
9
+ def pickle_model(model):
10
+ DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "intent_classification_model.joblib"
11
+ dump(model, DATA_DIR)
12
+
13
+
14
+ def create_intent_classification_model():
15
+ encoder = SentenceTransformer('all-MiniLM-L6-v2')
16
+ # path = list(Path.cwd().glob('*.csv'))
17
+ DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "labeled_data.csv"
18
+
19
+ print("DATA_DIR")
20
+ print(f"{DATA_DIR}")
21
+
22
+ with open(f"{DATA_DIR}",'r', newline='', encoding='utf-8') as f:
23
+ df = pd.read_csv(f)
24
+ df = df[df.columns[:2]]
25
+ df = df.dropna()
26
+ X_explore = np.array([list(encoder.encode(x)) for x in df['Utterance']])
27
+ X = np.array([list(encoder.encode(x)) for x in df['Utterance']])
28
+ y = df['Label']
29
+ model = LogisticRegression(class_weight='balanced')
30
+ model.fit(X, y, sample_weight=None)
31
+
32
+ print("MODEL")
33
+ print(model)
34
+
35
+ pickle_model(model)
36
+
37
+
38
+ def retrieve_intent_classification_model():
39
+ DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "intent_classification_model.joblib"
40
+ model = load(DATA_DIR)
41
+ return model
42
+
43
+
44
+ def predict_message_intent(message):
45
+ encoder = SentenceTransformer('all-MiniLM-L6-v2')
46
+ model = retrieve_intent_classification_model()
47
+ tokenized_utterance = np.array([list(encoder.encode(message))])
48
+ predicted_label = model.predict(tokenized_utterance)
49
+ predicted_probabilities = model.predict_proba(tokenized_utterance)
50
+ confidence_score = predicted_probabilities.max()
51
+
52
+ return {"type": "intent", "data": predicted_label[0], "confidence": confidence_score}
mathtext_fastapi/nlu.py CHANGED
@@ -1,6 +1,8 @@
 
1
  from mathtext_fastapi.logging import prepare_message_data_for_logging
2
  from mathtext.sentiment import sentiment
3
  from mathtext.text2int import text2int
 
4
  import re
5
 
6
 
@@ -8,27 +10,41 @@ def build_nlu_response_object(type, data, confidence):
8
  """ Turns nlu results into an object to send back to Turn.io
9
  Inputs
10
  - type: str - the type of nlu run (integer or sentiment-analysis)
11
- - data: str - the student message
12
  - confidence: - the nlu confidence score (sentiment) or '' (integer)
 
 
 
 
 
 
13
  """
14
  return {'type': type, 'data': data, 'confidence': confidence}
15
 
16
 
17
- def test_for_float_or_int(message_data, message_text):
18
- nlu_response = {}
19
- if type(message_text) == int or type(message_text) == float:
20
- nlu_response = build_nlu_response_object('integer', message_text, '')
21
- prepare_message_data_for_logging(message_data, nlu_response)
22
- return nlu_response
23
 
24
 
25
  def test_for_number_sequence(message_text_arr, message_data, message_text):
 
 
 
 
 
 
 
 
26
  nlu_response = {}
27
  if all(ele.isdigit() for ele in message_text_arr):
28
  nlu_response = build_nlu_response_object(
29
  'integer',
30
  ','.join(message_text_arr),
31
- ''
32
  )
33
  prepare_message_data_for_logging(message_data, nlu_response)
34
  return nlu_response
@@ -42,6 +58,9 @@ def run_text2int_on_each_list_item(message_text_arr):
42
 
43
  Output
44
  - student_response_arr: list - a set of integers (32202 for error code)
 
 
 
45
  """
46
  student_response_arr = []
47
  for student_response in message_text_arr:
@@ -51,12 +70,63 @@ def run_text2int_on_each_list_item(message_text_arr):
51
 
52
 
53
  def run_sentiment_analysis(message_text):
 
 
 
 
 
 
 
 
54
  # TODO: Add intent labelling here
55
  # TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
56
  return sentiment(message_text)
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def evaluate_message_with_nlu(message_data):
 
 
 
 
 
 
 
 
60
  # Keeps system working with two different inputs - full and filtered @event object
61
  try:
62
  message_text = message_data['message_body']
@@ -73,20 +143,31 @@ def evaluate_message_with_nlu(message_data):
73
  }
74
  message_text = message_data['message_body']
75
 
 
 
 
 
 
76
  number_api_resp = text2int(message_text.lower())
77
 
78
  if number_api_resp == 32202:
79
- sentiment_api_resp = sentiment(message_text)
80
- nlu_response = build_nlu_response_object(
81
- 'sentiment',
82
- sentiment_api_resp[0]['label'],
83
- sentiment_api_resp[0]['score']
84
- )
 
 
 
 
 
 
85
  else:
86
  nlu_response = build_nlu_response_object(
87
  'integer',
88
  number_api_resp,
89
- ''
90
  )
91
 
92
  prepare_message_data_for_logging(message_data, nlu_response)
 
1
+ from fuzzywuzzy import fuzz
2
  from mathtext_fastapi.logging import prepare_message_data_for_logging
3
  from mathtext.sentiment import sentiment
4
  from mathtext.text2int import text2int
5
+ from mathtext_fastapi.intent_classification import create_intent_classification_model, retrieve_intent_classification_model, predict_message_intent
6
  import re
7
 
8
 
 
10
  """ Turns nlu results into an object to send back to Turn.io
11
  Inputs
12
  - type: str - the type of nlu run (integer or sentiment-analysis)
13
+ - data: str/int - the student message
14
  - confidence: - the nlu confidence score (sentiment) or '' (integer)
15
+
16
+ >>> build_nlu_response_object('integer', 8, 0)
17
+ {'type': 'integer', 'data': 8, 'confidence': 0}
18
+
19
+ >>> build_nlu_response_object('sentiment', 'POSITIVE', 0.99)
20
+ {'type': 'sentiment', 'data': 'POSITIVE', 'confidence': 0.99}
21
  """
22
  return {'type': type, 'data': data, 'confidence': confidence}
23
 
24
 
25
+ # def test_for_float_or_int(message_data, message_text):
26
+ # nlu_response = {}
27
+ # if type(message_text) == int or type(message_text) == float:
28
+ # nlu_response = build_nlu_response_object('integer', message_text, '')
29
+ # prepare_message_data_for_logging(message_data, nlu_response)
30
+ # return nlu_response
31
 
32
 
33
  def test_for_number_sequence(message_text_arr, message_data, message_text):
34
+ """ Determines if the student's message is a sequence of numbers
35
+
36
+ >>> test_for_number_sequence(['1','2','3'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, '1, 2, 3')
37
+ {'type': 'integer', 'data': '1,2,3', 'confidence': 0}
38
+
39
+ >>> test_for_number_sequence(['a','b','c'], {"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"}, 'a, b, c')
40
+ {}
41
+ """
42
  nlu_response = {}
43
  if all(ele.isdigit() for ele in message_text_arr):
44
  nlu_response = build_nlu_response_object(
45
  'integer',
46
  ','.join(message_text_arr),
47
+ 0
48
  )
49
  prepare_message_data_for_logging(message_data, nlu_response)
50
  return nlu_response
 
58
 
59
  Output
60
  - student_response_arr: list - a set of integers (32202 for error code)
61
+
62
+ >>> run_text2int_on_each_list_item(['1','2','3'])
63
+ [1, 2, 3]
64
  """
65
  student_response_arr = []
66
  for student_response in message_text_arr:
 
70
 
71
 
72
  def run_sentiment_analysis(message_text):
73
+ """ Evaluates the sentiment of a student message
74
+
75
+ >>> run_sentiment_analysis("I am tired")
76
+ [{'label': 'NEGATIVE', 'score': 0.9997807145118713}]
77
+
78
+ >>> run_sentiment_analysis("I am full of joy")
79
+ [{'label': 'POSITIVE', 'score': 0.999882698059082}]
80
+ """
81
  # TODO: Add intent labelling here
82
  # TODO: Add logic to determine whether intent labeling or sentiment analysis is more appropriate (probably default to intent labeling)
83
  return sentiment(message_text)
84
 
85
 
86
+ def run_intent_classification(message_text):
87
+ """ Process a student's message using basic fuzzy text comparison
88
+
89
+ >>> run_intent_classification("exit")
90
+ {'type': 'intent', 'data': 'exit', 'confidence': 1.0}
91
+ >>> run_intent_classification("exi")
92
+ {'type': 'intent', 'data': 'exit', 'confidence': 0.86}
93
+ >>> run_intent_classification("eas")
94
+ {'type': 'intent', 'data': '', 'confidence': 0}
95
+ >>> run_intent_classification("hard")
96
+ {'type': 'intent', 'data': '', 'confidence': 0}
97
+ >>> run_intent_classification("hardier")
98
+ {'type': 'intent', 'data': 'harder', 'confidence': 0.92}
99
+ """
100
+ label = ''
101
+ ratio = 0
102
+ nlu_response = {'type': 'intent', 'data': label, 'confidence': ratio}
103
+ commands = [
104
+ 'easier',
105
+ 'exit',
106
+ 'harder',
107
+ 'hint',
108
+ 'next',
109
+ 'stop',
110
+ ]
111
+
112
+ for command in commands:
113
+ ratio = fuzz.ratio(command, message_text.lower())
114
+ if ratio > 80:
115
+ nlu_response['data'] = command
116
+ nlu_response['confidence'] = ratio / 100
117
+
118
+ return nlu_response
119
+
120
+
121
  def evaluate_message_with_nlu(message_data):
122
+ """ Process a student's message using NLU functions and send the result
123
+
124
+ >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "8", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
125
+ {'type': 'integer', 'data': 8, 'confidence': 0}
126
+
127
+ >>> evaluate_message_with_nlu({"author_id": "57787919091", "author_type": "OWNER", "contact_uuid": "df78gsdf78df", "message_body": "I am tired", "message_direction": "inbound", "message_id": "dfgha789789ag9ga", "message_inserted_at": "2023-01-10T02:37:28.487319Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"})
128
+ {'type': 'sentiment', 'data': 'NEGATIVE', 'confidence': 0.9997807145118713}
129
+ """
130
  # Keeps system working with two different inputs - full and filtered @event object
131
  try:
132
  message_text = message_data['message_body']
 
143
  }
144
  message_text = message_data['message_body']
145
 
146
+ # Run intent classification only for keywords
147
+ intent_api_response = run_intent_classification(message_text)
148
+ if intent_api_response['data']:
149
+ return intent_api_response
150
+
151
  number_api_resp = text2int(message_text.lower())
152
 
153
  if number_api_resp == 32202:
154
+ # Run intent classification with logistic regression model
155
+ predicted_label = predict_message_intent(message_text)
156
+ if predicted_label['confidence'] > 0.01:
157
+ nlu_response = predicted_label
158
+ else:
159
+ # Run sentiment analysis
160
+ sentiment_api_resp = sentiment(message_text)
161
+ nlu_response = build_nlu_response_object(
162
+ 'sentiment',
163
+ sentiment_api_resp[0]['label'],
164
+ sentiment_api_resp[0]['score']
165
+ )
166
  else:
167
  nlu_response = build_nlu_response_object(
168
  'integer',
169
  number_api_resp,
170
+ 0
171
  )
172
 
173
  prepare_message_data_for_logging(message_data, nlu_response)
mathtext_fastapi/v2_conversation_manager.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import copy
3
+ import dill
4
+ import os
5
+ import json
6
+ import jsonpickle
7
+ import pickle
8
+ import random
9
+ import requests
10
+ import mathtext_fastapi.global_state_manager as gsm
11
+
12
+ from dotenv import load_dotenv
13
+ from mathtext_fastapi.nlu import evaluate_message_with_nlu
14
+ from mathtext_fastapi.math_quiz_fsm import MathQuizFSM
15
+ from mathtext_fastapi.math_subtraction_fsm import MathSubtractionFSM
16
+ from supabase import create_client
17
+ from transitions import Machine
18
+
19
+ from scripts.quiz.generators import start_interactive_math
20
+ from scripts.quiz.hints import generate_hint
21
+
22
+ load_dotenv()
23
+
24
+ SUPA = create_client(
25
+ os.environ.get('SUPABASE_URL'),
26
+ os.environ.get('SUPABASE_KEY')
27
+ )
28
+
29
+
30
+ def pickle_and_encode_state_machine(state_machine):
31
+ dump = pickle.dumps(state_machine)
32
+ dump_encoded = base64.b64encode(dump).decode('utf-8')
33
+ return dump_encoded
34
+
35
+
36
+ def manage_math_quiz_fsm(user_message, contact_uuid, type):
37
+ fsm_check = SUPA.table('state_machines').select("*").eq(
38
+ "contact_uuid",
39
+ contact_uuid
40
+ ).execute()
41
+
42
+ # This doesn't allow for when one FSM is present and the other is empty
43
+ """
44
+ 1
45
+ data=[] count=None
46
+
47
+ 2
48
+ data=[{'id': 29, 'contact_uuid': 'j43hk26-2hjl-43jk-hnk2-k4ljl46j0ds09', 'addition3': None, 'subtraction': None, 'addition':
49
+
50
+ - but problem is there is no subtraction , but it's assuming there's a subtration
51
+
52
+ Cases
53
+ - make a completely new record
54
+ - update an existing record with an existing FSM
55
+ - update an existing record without an existing FSM
56
+ """
57
+ print("MATH QUIZ FSM ACTIVITY")
58
+ print("user_message")
59
+ print(user_message)
60
+ # Make a completely new entry
61
+ if fsm_check.data == []:
62
+ if type == 'addition':
63
+ math_quiz_state_machine = MathQuizFSM()
64
+ else:
65
+ math_quiz_state_machine = MathSubtractionFSM()
66
+ messages = [math_quiz_state_machine.response_text]
67
+ dump_encoded = pickle_and_encode_state_machine(math_quiz_state_machine)
68
+
69
+ SUPA.table('state_machines').insert({
70
+ 'contact_uuid': contact_uuid,
71
+ f'{type}': dump_encoded
72
+ }).execute()
73
+ # Update an existing record with a new state machine
74
+ elif not fsm_check.data[0][type]:
75
+ if type == 'addition':
76
+ math_quiz_state_machine = MathQuizFSM()
77
+ else:
78
+ math_quiz_state_machine = MathSubtractionFSM()
79
+ messages = [math_quiz_state_machine.response_text]
80
+ dump_encoded = pickle_and_encode_state_machine(math_quiz_state_machine)
81
+
82
+ SUPA.table('state_machines').update({
83
+ f'{type}': dump_encoded
84
+ }).eq(
85
+ "contact_uuid", contact_uuid
86
+ ).execute()
87
+ # Update an existing record with an existing state machine
88
+ elif fsm_check.data[0][type]:
89
+ undump_encoded = base64.b64decode(
90
+ fsm_check.data[0][type].encode('utf-8')
91
+ )
92
+ math_quiz_state_machine = pickle.loads(undump_encoded)
93
+
94
+ math_quiz_state_machine.student_answer = user_message
95
+ math_quiz_state_machine.correct_answer = str(math_quiz_state_machine.correct_answer)
96
+ messages = math_quiz_state_machine.validate_answer()
97
+ dump_encoded = pickle_and_encode_state_machine(math_quiz_state_machine)
98
+ SUPA.table('state_machines').update({
99
+ f'{type}': dump_encoded
100
+ }).eq(
101
+ "contact_uuid", contact_uuid
102
+ ).execute()
103
+ return messages
104
+
105
+
106
+ def retrieve_microlesson_content(context_data, user_message, microlesson, contact_uuid):
107
+ if context_data['local_state'] == 'addition-question-sequence' or \
108
+ user_message == 'add' or \
109
+ microlesson == 'addition':
110
+ messages = manage_math_quiz_fsm(user_message, contact_uuid, 'addition')
111
+
112
+ if user_message == 'exit':
113
+ state_label = 'exit'
114
+ else:
115
+ state_label = 'addition-question-sequence'
116
+
117
+ input_prompt = messages.pop()
118
+ message_package = {
119
+ 'messages': messages,
120
+ 'input_prompt': input_prompt,
121
+ 'state': state_label
122
+ }
123
+ elif context_data['local_state'] == 'subtraction-question-sequence' or \
124
+ user_message == 'subtract' or \
125
+ microlesson == 'subtraction':
126
+ messages = manage_math_quiz_fsm(user_message, contact_uuid, 'subtraction')
127
+
128
+ if user_message == 'exit':
129
+ state_label = 'exit'
130
+ else:
131
+ state_label = 'subtraction-question-sequence'
132
+
133
+ input_prompt = messages.pop()
134
+
135
+ message_package = {
136
+ 'messages': messages,
137
+ 'input_prompt': input_prompt,
138
+ 'state': state_label
139
+ }
140
+ print("MICROLESSON CONTENT RESPONSE")
141
+ print(message_package)
142
+ return message_package
143
+
144
+
145
+ curriculum_lookup_table = {
146
+ 'N1.1.1_G1': 'addition',
147
+ 'N1.1.1_G2': 'subtraction'
148
+ }
149
+
150
+
151
+ def lookup_local_state(next_state):
152
+ microlesson = curriculum_lookup_table[next_state]
153
+ return microlesson
154
+
155
+
156
+ def create_text_message(message_text, whatsapp_id):
157
+ """ Fills a template with input values to send a text message to Whatsapp
158
+
159
+ Inputs
160
+ - message_text: str - the content that the message should display
161
+ - whatsapp_id: str - the message recipient's phone number
162
+
163
+ Outputs
164
+ - message_data: dict - a preformatted template filled with inputs
165
+ """
166
+ message_data = {
167
+ "preview_url": False,
168
+ "recipient_type": "individual",
169
+ "to": whatsapp_id,
170
+ "type": "text",
171
+ "text": {
172
+ "body": message_text
173
+ }
174
+ }
175
+ return message_data
176
+
177
+
178
+ def manage_conversation_response(data_json):
179
+ """ Calls functions necessary to determine message and context data """
180
+ print("V2 ENDPOINT")
181
+
182
+ user_message = ''
183
+ # whatsapp_id = data_json['author_id']
184
+ message_data = data_json['message_data']
185
+ context_data = data_json['context_data']
186
+ whatsapp_id = message_data['author_id']
187
+ print("MESSAGE DATA")
188
+ print(message_data)
189
+ print("CONTEXT DATA")
190
+ print(context_data)
191
+ print("=================")
192
+
193
+ # nlu_response = evaluate_message_with_nlu(message_data)
194
+
195
+ # context_data = {
196
+ # 'contact_uuid': 'abcdefg',
197
+ # 'current_state': 'N1.1.1_G2',
198
+ # 'user_message': '1',
199
+ # 'local_state': ''
200
+ # }
201
+ print("STEP 1")
202
+ print(data_json)
203
+ if not context_data['current_state']:
204
+ context_data['current_state'] = 'N1.1.1_G1'
205
+
206
+ curriculum_copy = copy.deepcopy(gsm.curriculum)
207
+
208
+ print("STEP 2")
209
+ if context_data['user_message'] == 'easier':
210
+ curriculum_copy.left()
211
+ next_state = curriculum_copy.state
212
+ elif context_data['user_message'] == 'harder':
213
+ curriculum_copy.right()
214
+ next_state = curriculum_copy.state
215
+ else:
216
+ next_state = context_data['current_state']
217
+
218
+ print("STEP 3")
219
+ microlesson = lookup_local_state(next_state)
220
+
221
+ print("microlesson")
222
+ print(microlesson)
223
+
224
+ microlesson_content = retrieve_microlesson_content(context_data, context_data['user_message'], microlesson, context_data['contact_uuid'])
225
+
226
+ headers = {
227
+ 'Authorization': f"Bearer {os.environ.get('TURN_AUTHENTICATION_TOKEN')}",
228
+ 'Content-Type': 'application/json'
229
+ }
230
+
231
+ # Send all messages for the current state before a user input prompt (text/button input request)
232
+ for message in microlesson_content['messages']:
233
+ data = create_text_message(message, whatsapp_id)
234
+
235
+ print("data")
236
+ print(data)
237
+
238
+ r = requests.post(
239
+ f'https://whatsapp.turn.io/v1/messages',
240
+ data=json.dumps(data),
241
+ headers=headers
242
+ )
243
+
244
+ print("STEP 4")
245
+ # combine microlesson content and context_data object
246
+
247
+ updated_context = {
248
+ "context": {
249
+ "contact_id": whatsapp_id,
250
+ "contact_uuid": context_data['contact_uuid'],
251
+ "state": microlesson_content['state'],
252
+ "bot_message": microlesson_content['input_prompt'],
253
+ "user_message": user_message,
254
+ "type": 'ask'
255
+ }
256
+ }
257
+ return updated_context
requirements.txt CHANGED
@@ -1,12 +1,15 @@
1
  dill
2
- en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl
 
3
  jsonpickle
4
- git+ssh://git@gitlab.com/tangibleai/community/mathtext.git@main#egg=mathtext
5
- git+ssh://git@gitlab.com/tangibleai/community/mathactive.git@vlad#egg=mathactive
6
- fastapi
7
- pydantic
8
- requests
9
- sentencepiece
 
 
10
  supabase
11
  transitions
12
  uvicorn
 
1
  dill
2
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl
3
+ fuzzywuzzy
4
  jsonpickle
5
+ mathtext @ git+https://gitlab.com/tangibleai/community/mathtext@main
6
+ fastapi==0.74.*
7
+ openpyxl
8
+ pydantic==1.10.*
9
+ python-Levenshtein
10
+ requests==2.27.*
11
+ sentencepiece==0.1.*
12
+ sentence-transformers
13
  supabase
14
  transitions
15
  uvicorn
scripts/make_request.py CHANGED
@@ -22,7 +22,11 @@ def add_message_text_to_sample_object(message_text):
22
  message_data = '{' + f'"author_id": "+57787919091", "author_type": "OWNER", "contact_uuid": "j43hk26-2hjl-43jk-hnk2-k4ljl46j0ds09", "message_body": "{message_text}", "message_direction": "inbound", "message_id": "4kl209sd0-a7b8-2hj3-8563-3hu4a89b32", "message_inserted_at": "2023-01-10T02:37:28.477940Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"' + '}'
23
  # context_data = '{' + '"user":"", "state":"addition-question-sequence", "bot_message":"", "user_message":"{message_text}"' + '}'
24
 
25
- context_data = '{' + '"user":"", "state":"start-conversation", "bot_message":"", "user_message":"{message_text}"' + '}'
 
 
 
 
26
 
27
  # context_data = '{' + '"user":"", "state":"addition-question-sequence", "bot_message":"", "user_message":"{message_text}","text": "What is 2+3?","question_numbers": [4,3],"right_answer": 7,"number_correct": 2, "number_incorrect": 0, "hints_used": 0, "level": "easy"' + '}'
28
 
@@ -44,7 +48,7 @@ def run_simulated_request(endpoint, sample_answer, context=None):
44
  print(f"Case: {sample_answer}")
45
  b_string = add_message_text_to_sample_object(sample_answer)
46
 
47
- if endpoint == 'sentiment-analysis' or endpoint == 'text2int':
48
  request = requests.post(
49
  url=f'http://localhost:7860/{endpoint}',
50
  json={'content': sample_answer}
@@ -58,21 +62,25 @@ def run_simulated_request(endpoint, sample_answer, context=None):
58
  print(request)
59
 
60
 
 
 
61
  # run_simulated_request('sentiment-analysis', 'I reject it')
62
  # run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
63
- run_simulated_request('nlu', 'test message')
64
- run_simulated_request('nlu', 'eight')
65
- run_simulated_request('nlu', 'is it 8')
66
- run_simulated_request('nlu', 'can I know how its 0.5')
67
- run_simulated_request('nlu', 'eight, nine, ten')
68
- run_simulated_request('nlu', '8, 9, 10')
69
- run_simulated_request('nlu', '8')
70
- run_simulated_request('nlu', "I don't know")
71
- run_simulated_request('nlu', "I don't know eight")
72
- run_simulated_request('nlu', "I don't 9")
73
- run_simulated_request('nlu', "0.2")
74
- run_simulated_request('nlu', 'Today is a wonderful day')
75
- run_simulated_request('nlu', 'IDK 5?')
 
 
76
  # run_simulated_request('manager', '')
77
  # run_simulated_request('manager', 'add')
78
  # run_simulated_request('manager', 'subtract')
 
22
  message_data = '{' + f'"author_id": "+57787919091", "author_type": "OWNER", "contact_uuid": "j43hk26-2hjl-43jk-hnk2-k4ljl46j0ds09", "message_body": "{message_text}", "message_direction": "inbound", "message_id": "4kl209sd0-a7b8-2hj3-8563-3hu4a89b32", "message_inserted_at": "2023-01-10T02:37:28.477940Z", "message_updated_at": "2023-01-10T02:37:28.487319Z"' + '}'
23
  # context_data = '{' + '"user":"", "state":"addition-question-sequence", "bot_message":"", "user_message":"{message_text}"' + '}'
24
 
25
+ # V1
26
+ # context_data = '{' + '"user":"", "state":"start-conversation", "bot_message":"", "user_message":"{message_text}"' + '}'
27
+
28
+ #V2
29
+ context_data = '{' + '"contact_uuid": "j43hk26-2hjl-43jk-hnk2-k4ljl46j0ds09", "current_state":"", "local_state": "", "user_message":""' + '}'
30
 
31
  # context_data = '{' + '"user":"", "state":"addition-question-sequence", "bot_message":"", "user_message":"{message_text}","text": "What is 2+3?","question_numbers": [4,3],"right_answer": 7,"number_correct": 2, "number_incorrect": 0, "hints_used": 0, "level": "easy"' + '}'
32
 
 
48
  print(f"Case: {sample_answer}")
49
  b_string = add_message_text_to_sample_object(sample_answer)
50
 
51
+ if endpoint == 'sentiment-analysis' or endpoint == 'text2int' or endpoint =='intent-classification':
52
  request = requests.post(
53
  url=f'http://localhost:7860/{endpoint}',
54
  json={'content': sample_answer}
 
62
  print(request)
63
 
64
 
65
+ # run_simulated_request('intent-classification', 'exit')
66
+ # run_simulated_request('intent-classification', "I'm not sure")
67
  # run_simulated_request('sentiment-analysis', 'I reject it')
68
  # run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
69
+ # run_simulated_request('nlu', 'test message')
70
+ # run_simulated_request('nlu', 'eight')
71
+ # run_simulated_request('nlu', 'is it 8')
72
+ # run_simulated_request('nlu', 'can I know how its 0.5')
73
+ # run_simulated_request('nlu', 'eight, nine, ten')
74
+ # run_simulated_request('nlu', '8, 9, 10')
75
+ # run_simulated_request('nlu', '8')
76
+ # run_simulated_request('nlu', "I don't know")
77
+ # run_simulated_request('nlu', "I don't know eight")
78
+ # run_simulated_request('nlu', "I don't 9")
79
+ # run_simulated_request('nlu', "0.2")
80
+ # run_simulated_request('nlu', 'Today is a wonderful day')
81
+ # run_simulated_request('nlu', 'IDK 5?')
82
+ run_simulated_request('v2/manager', '')
83
+ run_simulated_request('v2/manager', '5')
84
  # run_simulated_request('manager', '')
85
  # run_simulated_request('manager', 'add')
86
  # run_simulated_request('manager', 'subtract')