mathtext-wormhole-staging

Build error

App Files Files Community

Greg Thompson commited on Mar 6, 2023

Commit

eecb00a

•

2 Parent(s): fc1ccfb 1696121

Merge branch 'feature-intent-model' into 'staging'

Browse files

Feature intent model

See merge request tangibleai/community/mathtext-fastapi!13

Files changed (6) hide show

mathtext_fastapi/data/intent_classification_model.joblib +3 -0
mathtext_fastapi/data/labeled_data.csv +144 -0
mathtext_fastapi/intent_classification.py +52 -0
mathtext_fastapi/nlu.py +9 -0
requirements.txt +1 -0
scripts/make_request.py +16 -15

mathtext_fastapi/data/intent_classification_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4954368c3b95673167ce347f2962b5508c4af295b6af58b6c11b3c1075b42e
+size 127903

mathtext_fastapi/data/labeled_data.csv ADDED Viewed

	@@ -0,0 +1,144 @@

+Utterance,Label
+skip this,skip
+this is stupid,skip
+this is stupid,harder
+this is stupid,feedback
+I'm done,exit
+quit,exit
+I don't know,hint
+help,hint
+can I do something else?,main menu
+what's going on,rapport
+what's going on,main menu
+tell me a joke,rapport
+tell me a joke,main menu
+Sorry I don't understand,do not know
+Ten thousand,number
+1.234,number
+"10,000",number
+"123, 456",numbers
+"11, 12, 13",numbers
+"100, 200, 300",numbers
+"100, 200",numbers
+Stop for a minute,wait
+Bye bye,exit
+Good night,exit
+Am done,exit
+Yes,yes
+Help,help
+Idiot,harder
+Stop,exit
+I don't get it,hint
+Math,main menu
+Math,math topic
+Tomorrow let do math,wait
+Later,wait
+Pls i will continue pls,skip
+Rori tell me now,help
+harder,skip
+Stop for now i wont  to go to  School,exit
+Next,next
+Okay,okay
+Great,affirmation
+Give me for example,example
+No I want to learn algebraic expressions,algebra
+Hi rori,greeting
+*help*,help
+*Next*,next
+Okay nice,okay
+I don't know it,hint
+Nex,next
+I need a help,hint
+Please can I ask your any math questions?,faq
+The answer is 1,answer
+The answer is 1,number
+But 0.8 is also same as . 8 so I was actually right,I'm right
+What is the number system?,faq
+Ok thanks,thanks
+I'm going to school now,exit
+Let's move to another topic,main menu
+"Ummanni saba
+Kebena bara kana galmi keenya inni guddaan bilisummaa qofa #Gabrummaan_ammaan booda_gaha namni hundi bakka jiru irraa kutatee ka,ee jira obboleewwan goototni keenya jiran haqa Kebenaaf jechaa jiru Guraandhala 29 booda walabummaa keenya labsina Dhugaa qabna Ni injifanna *** . Naannoo giddu galeessa Itoophiyaatti #Kebenaan aanaa addaati Kun murtoo ummata Kebenaa hundaati",spam
+Yes it,yes
+U type fast,too fast
+I mean your typing is fast,too fast
+Why do u type so fast,too fast
+Ur  typing is fast,too fast
+Can we go to a real work,harder
+I know all this,harder
+Answer this,preamble
+Am tired,exit
+This is not what I asked for,main menu
+Bye,exit
+😱😱😂😂😂😡😰😰😰😒,spam
+Gbxbxbcbcbbcbchcbchc,spam
+I want to solve math,math topic
+Pleas let start with the fraction,fractions topic
+Okey,okay
+i need substraction,subtraction topic
+Can you please stop with me,exit
+Another one,next
+Harder or easy,main menu
+Hard or easier,main menu
+Jump topic,menu
+Got it,okay
+I didn't understand,don't know
+Don't understand,don't know
+Excuse me pls,hint
+Let stop for today,exit
+Help and stop asking me stupid questions,
+Ykay,okay
+Not interested in solving this,menu
+Stpo,exit
+Hiiiiiii,greeting
+Hi rori,greeting
+I've done this things before,harder
+Which number my phone number,
+Unit,main menu
+No ide,don't know
+No ide,hint
+No idea,don't know
+🙈🤩😇🙏,spam
+Thank u,thanks
+Do you know programming,faq
+Delete my number,unsubscribe
+See u,exit
+Can I go for break ??,wait
+I wanna fuck,profanity
+Enough of this nw,exit
+Can we move to equations,equations
+Do you know you are an idiot,insult
+3 digit number,number
+3 digit number,answer
+Three digit number,confident answer
+Three digit number,number
+Good evening Rori,greeting
+89 Next,answer
+89 Next,number
+3 digit number,answer
+Three digit number,answer
+This is too simple,harder
+Am not a kid,harder
+Hey Miss Roribcan you ask me some question from Secondary 2,greeting
+Hey Miss Roribcan you ask me some question from Secondary 2,faq
+Hey Miss Roribcan you ask me some question from Secondary 2,main menu
+don't know,hint
+don't know,easier
+𝑴𝒂𝒕𝒉,math
+Rori can you help me to gat value,
+I called but u are not picking up,
+0.3 answer,answer
+Sorry rori was101,answer
+Y is it 6,answer
+Y is it 6,number
+0.3 answer,number
+Why 0.5,more explanation
+Why 0.5,number
+6\nNext,Next
+How is the answer is 11,more explanation
+How comes we have 11,more explanation
+Yes 6,answer
+Yes 6,number
+6\nNext,number
+How is the answer is 11,number
+How comes we have 11,number

mathtext_fastapi/intent_classification.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import numpy as np
+import pandas as pd
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+from sklearn.linear_model import LogisticRegression
+from joblib import dump, load
+def pickle_model(model):
+    DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "intent_classification_model.joblib"
+    dump(model, DATA_DIR)
+def create_intent_classification_model():
+    encoder = SentenceTransformer('all-MiniLM-L6-v2')
+    # path = list(Path.cwd().glob('*.csv'))
+    DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "labeled_data.csv"
+    print("DATA_DIR")
+    print(f"{DATA_DIR}")
+    with open(f"{DATA_DIR}",'r', newline='', encoding='utf-8') as f:
+        df = pd.read_csv(f)
+    df = df[df.columns[:2]]
+    df = df.dropna()
+    X_explore = np.array([list(encoder.encode(x)) for x in df['Utterance']])
+    X = np.array([list(encoder.encode(x)) for x in df['Utterance']])
+    y = df['Label']
+    model = LogisticRegression(class_weight='balanced')
+    model.fit(X, y, sample_weight=None)
+    print("MODEL")
+    print(model)
+    pickle_model(model)
+def retrieve_intent_classification_model():
+    DATA_DIR = Path(__file__).parent.parent / "mathtext_fastapi" / "data" / "intent_classification_model.joblib"
+    model = load(DATA_DIR)
+    return model
+def predict_message_intent(message):
+    encoder = SentenceTransformer('all-MiniLM-L6-v2')
+    model = retrieve_intent_classification_model()
+    tokenized_utterance = np.array([list(encoder.encode(message))])
+    predicted_label = model.predict(tokenized_utterance)
+    predicted_probabilities = model.predict_proba(tokenized_utterance)
+    confidence_score = predicted_probabilities.max()
+    return {"type": "intent", "data": predicted_label[0], "confidence": confidence_score}

mathtext_fastapi/nlu.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fuzzywuzzy import fuzz
 from mathtext_fastapi.logging import prepare_message_data_for_logging
 from mathtext.sentiment import sentiment
 from mathtext.text2int import text2int
 import re
@@ -142,6 +143,7 @@ def evaluate_message_with_nlu(message_data):
         }
         message_text = message_data['message_body']
     intent_api_response = run_intent_classification(message_text)
     if intent_api_response['data']:
         return intent_api_response
@@ -149,6 +151,13 @@ def evaluate_message_with_nlu(message_data):
     number_api_resp = text2int(message_text.lower())
     if number_api_resp == 32202:
         sentiment_api_resp = sentiment(message_text)
         nlu_response = build_nlu_response_object(
             'sentiment',

 from mathtext_fastapi.logging import prepare_message_data_for_logging
 from mathtext.sentiment import sentiment
 from mathtext.text2int import text2int
+from mathtext_fastapi.intent_classification import create_intent_classification_model, retrieve_intent_classification_model, predict_message_intent
 import re
         }
         message_text = message_data['message_body']
+    # Run intent classification only for keywords
     intent_api_response = run_intent_classification(message_text)
     if intent_api_response['data']:
         return intent_api_response
     number_api_resp = text2int(message_text.lower())
     if number_api_resp == 32202:
+        # Run intent classification with logistic regression model
+        predicted_label = predict_message_intent(message_text)
+        if predicted_label['confidence'] > 0.01:
+            nlu_response = predicted_label
+            return nlu_response
+        # Run sentiment analysis
         sentiment_api_resp = sentiment(message_text)
         nlu_response = build_nlu_response_object(
             'sentiment',

requirements.txt CHANGED Viewed

@@ -8,6 +8,7 @@ pydantic==1.10.*
 python-Levenshtein
 requests==2.27.*
 sentencepiece==0.1.*
 supabase
 transitions
 uvicorn==0.17.*

 python-Levenshtein
 requests==2.27.*
 sentencepiece==0.1.*
+sentence-transformers
 supabase
 transitions
 uvicorn==0.17.*

scripts/make_request.py CHANGED Viewed

@@ -58,22 +58,23 @@ def run_simulated_request(endpoint, sample_answer, context=None):
     print(request)
-run_simulated_request('intent-classification', 'exit')
-run_simulated_request('sentiment-analysis', 'I reject it')
-run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
-run_simulated_request('nlu', 'test message')
-run_simulated_request('nlu', 'eight')
-run_simulated_request('nlu', 'is it 8')
-run_simulated_request('nlu', 'can I know how its 0.5')
-run_simulated_request('nlu', 'eight, nine, ten')
-run_simulated_request('nlu', '8, 9, 10')
-run_simulated_request('nlu', '8')
 run_simulated_request('nlu', "I don't know")
-run_simulated_request('nlu', "I don't know eight")
-run_simulated_request('nlu', "I don't 9")
-run_simulated_request('nlu', "0.2")
-run_simulated_request('nlu', 'Today is a wonderful day')
-run_simulated_request('nlu', 'IDK 5?')
 # run_simulated_request('manager', '')
 # run_simulated_request('manager', 'add')
 # run_simulated_request('manager', 'subtract')

     print(request)
+# run_simulated_request('intent-classification', 'exit')
+# run_simulated_request('intent-classification', "I'm not sure")
+# run_simulated_request('sentiment-analysis', 'I reject it')
+# run_simulated_request('text2int', 'seven thousand nine hundred fifty seven')
+# run_simulated_request('nlu', 'test message')
+# run_simulated_request('nlu', 'eight')
+# run_simulated_request('nlu', 'is it 8')
+# run_simulated_request('nlu', 'can I know how its 0.5')
+# run_simulated_request('nlu', 'eight, nine, ten')
+# run_simulated_request('nlu', '8, 9, 10')
+# run_simulated_request('nlu', '8')
 run_simulated_request('nlu', "I don't know")
+# run_simulated_request('nlu', "I don't know eight")
+# run_simulated_request('nlu', "I don't 9")
+# run_simulated_request('nlu', "0.2")
+# run_simulated_request('nlu', 'Today is a wonderful day')
+# run_simulated_request('nlu', 'IDK 5?')
 # run_simulated_request('manager', '')
 # run_simulated_request('manager', 'add')
 # run_simulated_request('manager', 'subtract')