Spaces:

faldeus0092
/

rg-intent-classification-demo

Sleeping

App Files Files Community

faldeus0092 commited on Nov 22, 2023

Commit

d5b718d

•

1 Parent(s): 443d4b8

i hate commit

Browse files

Files changed (5) hide show

app.py +160 -0
flows.py +166 -0
intents.py +68 -0
requirements.txt +3 -0
utils.py +41 -0

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+#@title Model Loading
+from sentence_transformers import SentenceTransformer, util
+from utils import get_latest_user_input_from_prompt, get_top_intents, create_embedding
+from intents import intents, intents_sentence_similarity_en
+import flows
+import gradio as gr
+import pandas as pd
+model_en = SentenceTransformer("intfloat/multilingual-e5-base")
+intents_embedding = create_embedding(intents_sentence_similarity_en, model_en)
+def raw_inference(input, state, n_samples, threshold):
+  state = flows.STATE_FLOWS_MAP[state]
+  query_embedding = model_en.encode(input)
+  similarity = util.pytorch_cos_sim(query_embedding, intents_embedding)
+  result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=state)
+  return result
+def process_csv(files):
+  global df
+  df = pd.read_csv(files, low_memory=False)
+  df = df[df['chatbot_response'].isin(intents)]
+  df = df[["user_message","prompt", "chatbot_response", "state"]]
+  df.dropna(inplace=True)
+  df = df.reset_index()
+  df.drop('index', axis='columns')
+  df_length = len(df.index)
+  chat = get_latest_user_input_from_prompt(df.iloc[1]["prompt"])
+  state = flows.STATE_FLOWS_MAP[df.iloc[1]['state']]
+  label = df.iloc[1]['chatbot_response']
+  accuracy = gr.Markdown("""
+        You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
+        """, visible=True)
+  accuracy_button = gr.Button("Calculate Accuracy", visible=True)
+  return (gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single", visible=False),
+          files,
+          gr.Slider(1, df_length, value=1, step=1, visible=True, label="Index", info="Select which index of data to check the intents"),
+          gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
+          gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
+          gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False))
+def update_index(index):
+  chat = get_latest_user_input_from_prompt(df.iloc[int(index)]["prompt"])
+  state = df.iloc[int(index)]['state']
+  label = df.iloc[int(index)]['chatbot_response']
+  return gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False), gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False), gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False)
+def check_accuracy(n_samples, threshold):
+  global df
+  res_list = []
+  for index, row in df.iterrows():
+    # chat = get_history_from_prompt(row["prompt"])
+    chat = get_latest_user_input_from_prompt(row["prompt"])
+    query_embedding = model_en.encode(chat)
+    flow = flows.STATE_FLOWS_MAP[row['state']]
+    similarity = util.pytorch_cos_sim(query_embedding, intents_embedding)
+    result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=flow)
+    label = row['chatbot_response']
+    isPredictedTrue=0
+    for item in result:
+      if label in item:
+        isPredictedTrue=1
+        break
+    res_list.append({'state': row['state'], 'gt': label, 'isPredictedTrue': isPredictedTrue})
+  res_df = pd.DataFrame(res_list)
+  # dataframe result
+  grouped_data = res_df.groupby('gt')['isPredictedTrue'].agg(['sum', 'count']).reset_index()
+  grouped_data['percentage'] = (grouped_data['sum'] / grouped_data['count']) * 100
+  # accuracy score
+  score = (res_df['isPredictedTrue'] == 1).sum()/res_df['isPredictedTrue'].count() * 100 #raw
+  print(score, grouped_data)
+  return score, grouped_data
+theme = gr.themes.Default(
+    primary_hue="indigo",
+    secondary_hue="pink",
+    neutral_hue="slate",
+)
+with gr.Blocks(title="Intent Classification Demo", theme=theme) as interface:
+  gr.Markdown("""# Demo for Intent Classification""")
+  with gr.Row(equal_height=True):
+    with gr.Column():
+      with gr.Tab("Input from raw text"):
+        raw_input_text = gr.Textbox(label="Input Chat", info="Input your chat here, the model will predict the intent")
+        raw_state = gr.Dropdown(["GeneralState",
+                            "HomeworkState",
+                            "ExerciseState",
+                            "UnderstandState",
+                            "RecommendMaterialState",
+                            "PersonalState",
+                            "AssessKnowledgeState"],
+                            label="State",
+                            info="Select state on which the chat currently on. Some state will exclude some intents")
+        raw_ask_button = gr.Button("Ask")
+      with gr.Tab("Input from Big Query data"):
+        gr.Markdown("""
+        ## Guide:
+        Assuming have access to BigQuery, you can query the table `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw`, export result as CSV file, and upload here (make sure your query contains these columns: `prompt, user_message, chatbot_response, state`)
+        ```SELECT prompt, user_message, chatbot_response, state FROM `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw` WHERE DATE(_PARTITIONTIME) BETWEEN DATE("2023-11-13") AND DATE("2023-11-19") AND service_name = 'learning_companion' LIMIT 1000```
+        Adjust the date according to needs. After that, export as CSV and upload to this gradio
+        example CSV files to use:
+        https://drive.google.com/file/d/1iDLywKP5JxDJXaAzomSUYLZRWvoGqpt5/view?usp=sharing
+        https://drive.google.com/file/d/1Jh_hP7U2JGQXsRo9OponyVSHL_s1Yx8w/view?usp=sharing
+        """)
+        file_output = gr.File()
+        upload_button = gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single")
+        index = gr.Slider(1, 1000, value=5, step=1, visible=False, label="Index", info="Select which index of data to check the intents")
+        input_text = gr.Textbox(label="Input Chat", info="Input in index", visible=False)
+        state = gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=False)
+        gt = gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=False)
+        ask_button = gr.Button("Ask With CSV")
+        index.change(fn=update_index, inputs=index, outputs=[input_text, state, gt])
+        upload_button.upload(process_csv, upload_button, [upload_button, file_output, index, input_text, state, gt])
+        with gr.Column():
+          with gr.Row():
+            accuracy = gr.Markdown("""
+            You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
+            """, visible=True)
+            accuracy_button = gr.Button("Calculate Accuracy", visible=True)
+          accuracy_score = gr.Textbox(label="Accuracy", info="Accuracy result tested on CSV file you uploaded", visible=True)
+          accuracy_table = gr.Dataframe(visible=True)
+    with gr.Column():
+      n_samples = gr.Slider(1, 10, value=5, step=1, label="N samples", info="Number of samples to be retrieved. Default is 5")
+      threshold = gr.Slider(0.0, 1.0, value=0.75, step=0.01, label="Threshold", info="Threshold of cosine similarity which intent will be considered similar to the input. The higher, the more similar the intent will be. Default is 0.75")
+      answer = gr.JSON(label="Prediction", show_label=True)
+  accuracy_button.click(fn=check_accuracy, inputs=[n_samples, threshold], outputs=[accuracy_score, accuracy_table])
+  raw_ask_button.click(fn=raw_inference, inputs=[raw_input_text, raw_state, n_samples, threshold], outputs=answer)
+  ask_button.click(fn=raw_inference, inputs=[input_text, state, n_samples, threshold], outputs=answer)
+interface.launch(debug=True)

flows.py ADDED Viewed

	@@ -0,0 +1,166 @@

+#@title flows.py
+GENERAL_STATE_FLOWS = [
+        "homework",
+        "homework_with_question",
+        "recommend",
+        "recommend_with_topic",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "explain_drill",
+        "exercise",
+        "exercise_with_topic",
+        "personal",
+        "finish_learning",
+        "provide_topic",
+        "provide_subject",
+        "provide_subtopic",
+        "provide_concept",
+        "general_intent",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "back_to_study",
+        "summarize",
+        "unknown"
+    ]
+UNDERSTAND_STATE_FLOWS = [
+        "homework",
+        "homework_with_question",
+        "provide_topic",
+        "provide_subject",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "recommend",
+        "recommend_with_topic",
+        "exercise",
+        "exercise_with_topic",
+        "exercise_multiple_question",
+        "exercise_multiple_question_with_topic",
+        "personal",
+        "finish_learning",
+        "back_to_study",
+        "unknown"
+    ]
+HOMEWORK_STATE_FLOWS = ["homework_with_question",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "provide_topic",
+        "provide_subject",
+        "homework",
+        "recommend",
+        "recommend_with_topic",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "exercise",
+        "exercise_with_topic",
+        "exercise_multiple_question",
+        "exercise_multiple_question_with_topic",
+        "similar_question",
+        "personal",
+        "finish_learning",
+        "unknown",
+        "back_to_study",
+]
+RECOMMEND_MATERIAL_FLOWS = [
+        "homework",
+        "homework_with_question",
+        "understand",
+        "understand_with_topic",
+        "recommend",
+        "recommend_with_topic",
+        "provide_topic",
+        "provide_subject",
+        "explain_answer",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "exercise",
+        "exercise_with_topic",
+        "exercise_multiple_question",
+        "exercise_multiple_question_with_topic",
+        "personal",
+        "finish_learning",
+        "go_back_to_general",
+        "unknown"
+]
+PERSONAL_STATE_FLOWS = [
+        "homework",
+        "homework_with_question",
+        "recommend",
+        "recommend_with_topic",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "personal",
+        "exercise",
+        "exercise_with_topic",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "provide_topic",
+        "provide_subject",
+        "back_to_study",
+        "finish_learning",
+        "unknown"
+]
+EXERCISE_STATE_FLOWS = [
+        "exercise",
+        "exercise_with_image",
+        "exercise_with_topic",
+        "exercise_multiple_question",
+        "exercise_multiple_question_with_topic",
+        "provide_topic",
+        "provide_subject",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "provide_answer",
+        "similar_question",
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "homework",
+        "homework_with_question",
+        "recommend",
+        "recommend_with_topic",
+        "finish_learning",
+        "personal",
+        "back_to_study",
+        "unknown"
+]
+ASSESS_KNOWLEDGE_STATE_FLOWS = [
+        "assess_knowledge",
+        "assess_knowledge_with_topic",
+        "assess_knowledge_answer",
+        "explain_drill",
+        "provide_topic",
+        "provide_subject",
+        "diagnosis_result",
+        "explain_answer",
+        "explain_answer_with_topic",
+        "homework",
+        "homework_with_question",
+        "recommend",
+        "recommend_with_topic",
+        "exercise",
+        "exercise_with_topic",
+        "exercise_multiple_question",
+        "exercise_multiple_question_with_topic",
+        "general_intent",
+        "finish_learning",
+        "personal",
+        "back_to_study",
+        "unknown"
+]
+STATE_FLOWS_MAP = {
+    "GeneralState":GENERAL_STATE_FLOWS,
+    "HomeworkState":HOMEWORK_STATE_FLOWS,
+    "ExerciseState":EXERCISE_STATE_FLOWS,
+    "UnderstandState":UNDERSTAND_STATE_FLOWS,
+    "RecommendMaterialState":RECOMMEND_MATERIAL_FLOWS,
+    "PersonalState":PERSONAL_STATE_FLOWS,
+    "AssessKnowledgeState":ASSESS_KNOWLEDGE_STATE_FLOWS,
+}

intents.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#@title intents.py
+intents_sentence_similarity_en = {
+    "homework":"Intent to do exercise questions or homework. (Bantu Kerjakan PR, Bantu PR,tanya soal, tanya pr)",
+    "homework_with_question":"Curious and asking spesific questions or homework.(Bantu Kerjakan soal ini, Bisa kerjakan soal ini, Beritahu jawabannya)",
+    "explain_answer":"Wants explanation/study about the spesific answer (Belajar Lebih Dalam/Ceritakan lebih lanjut/jelaskan dari awal/jelaskan/jelasin lebih lanjut/belum paham)",
+    "explain_answer_with_topic":"Wants explanation/study about spesific topics (Belajar Lebih Dalam terkait/Ceritakan lebih lanjut bab /jelaskan dari awal materi/jelaskan lebih lanjut pelajaran /belum paham terkait materi)",
+    "core_explanation_repeat":"Repeat the explanation of a question (Jelasin ulang/Jelaskan ulang/Jelaskan lagi)",
+    "core_explanation_exercise":"Example questions from the material explanation. (Contoh Soal/Contoh soal dong/bahas dengan soal)",
+    "core_explanation_video":"Explanation of the material with a video. (Video Penjelasan/Bahas dengan video/mau video penjelasan)",
+    "core_explanation_protips":"Explanation or questions about Tips, tricks, and effective methods (protips/tips dan trik)",
+    "negative_respond":"Did not understand the explanation (aku belum paham, masih belum paham, aku gangerti, aku belum mengerti, masih bingung, aku kesusahan)",
+    "positive_respond":"Understanding the explanation (aku sudah paham, oke paham kak, oke mengerti, oke aman, lanjut kak)",
+    "recommend":"Suggestions or recommendations (Rekomendasi Materi/Suggested Materials/Materi Terkait)",
+    "recommend_with_topic":"Suggestions or recommendations about certain lesson topic (Aku mau materi /lagi pengen materi /mau materi dong kak/Rekomendasi materi kak/Materi terkait)",
+    "assess_knowledge":"Assess skill/knowledge",
+    "assess_knowledge_with_topic":"Assess skill/knowledge from given topics",
+    "assess_knowledge_answer":"Follow up to assess knowledge result. (jawabanku kok salah ya, kenapa bisa salah ya jawabanku, aku gabisa jawab, wah jawabanku bener wkwk, yeay bener semua jawabanku, jawabanku salah 1 aja yeay, cuman bener 1 jawabanku, secara garis besar udah bagus ya hasilku, perlu peningkatan belajar lagi nih)",
+    "exercise":"Want to do exercise questions (Latihan Soal, aku mau soal, aku butuh soal, bantu carikan soal dong, mau soal dong, mau pertanyaan dong, latihan pertanyaan, coba ulang pertanyaan nya, ulangi pertanyaanya karena tadi kurang jelas, minta soal)",
+    "exercise_with_image":"Want to do exercise questions with an image",
+    "exercise_with_topic":"Want to do exercise questions of a topic (carikan soal integral, mau soal fisika, berikan aku soal tata surya, soal geografi, latihan soal ekonomi, soal biologi)",
+    "exercise_multiple_question":"Wants to do exercise with a number of question",
+    "exercise_multiple_question_with_topic":"Wants to do exercise with a number of question with topics",
+    "personal":"Sharing personal story (aku mau curhat, kak lagi sedih)",
+    "general_intent":"Greetings, Asking/desire about study and exams in general. (aku pusing, aku mau belajar kak, besok ulangan, butuh temen belajar, besok ujian, temenin aku, bantu aku, aku butuh bantuan)",
+    "provide_topic":"Lesson material Topic",
+    "provide_subject":"Lesson material Subject (Fisika, Kimia, Matematika, Biologi, Sejarah, Bahasa Indonesia, Geografi, Biologi, Bahasa Inggris)",
+    "provide_subtopic":"Lesson material Subtopic",
+    "provide_concept":"Concept",
+    "finish_learning":"Finished/already learned (Terimakasih /Thank you/No/Sudah paham!/Ok/Tidak)",
+    "back_to_study":"Return to study (Kembali Belajar) /Mulai Belajar?/langsung mulai belajar",
+    "similar_question":"Create similar exercise questions (Buatkan Soal Serupa / Cari Soal Serupa / Soal lagi yang mirip / Latihan soal yang mirip)",
+    "provide_answer":"Answering exercise questions (A, B, C, D, E) or more than one characters (Jawabanya A, B, C, D, E, Menurutku jawabanya A, B, C, D, E)",
+    "diagnosis_result":"See diagnosis or score result (Lihat hasil diagnosis / hasil drill)",
+    "explain_drill":"Explain exercise question drill material (bahas lewat chat/bahas soal nomer 1 ya/bahas soal nomer 2 ya/bahas soal nomer 3 ya/bahas soal nomer 4 ya/bahas soal nomer 5 ya)"
+  }
+intents = ['homework',
+ 'homework_with_question',
+ 'explain_answer',
+ 'explain_answer_with_topic',
+ 'core_explanation_repeat',
+ 'core_explanation_exercise',
+ 'core_explanation_video',
+ 'core_explanation_protips',
+ 'negative_respond',
+ 'positive_respond',
+ 'recommend',
+ 'recommend_with_topic',
+ 'assess_knowledge',
+ 'assess_knowledge_with_topic',
+ 'assess_knowledge_answer',
+ 'exercise',
+ 'exercise_with_image',
+ 'exercise_with_topic',
+ 'exercise_multiple_question',
+ 'exercise_multiple_question_with_topic',
+ 'personal',
+ 'general_intent',
+ 'provide_topic',
+ 'provide_subject',
+ 'provide_subtopic',
+ 'provide_concept',
+ 'finish_learning',
+ 'back_to_study',
+ 'similar_question',
+ 'provide_answer',
+ 'diagnosis_result',
+ 'explain_drill']

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+sentence-transformers
+transformers
+gradio

utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+#@title Utility Functions
+def get_history_from_prompt(prompt:str):
+  if "Here are previous chats for your reference (only use this if you need further information to infer the intent):" in prompt:
+    history = prompt.split("Here are previous chats for your reference (only use this if you need further information to infer the intent):")
+  else:
+    history = prompt.split("Here are previous chats or summary conversation for your reference (only use this if you need further information to infer the intent):")
+  return history[1].replace("""The Intent:""", '')
+def get_latest_user_input_from_prompt(prompt:str):
+  input = prompt.split("Here is the message you are to classify:")
+  if "Here are previous chats for your reference (only use this if you need further information to infer the intent):" in prompt:
+    input = input[1].split("Here are previous chats for your reference (only use this if you need further information to infer the intent):")
+  else:
+    input = input[1].split("Here are previous chats or summary conversation for your reference (only use this if you need further information to infer the intent)")
+  return input[0]
+# Get the top 5 intents with the highest values
+def get_top_intents(intent_list:list, similarity, n=5, threshold=0.3, flow=None) -> str:
+  result = dict()
+  for i in range(len(intent_list)):
+    if flow:
+      if intent_list[i] in flow:
+        # print("intent {} is ignored, because it's not in the possible intent".format(intent_list[i]))
+        if similarity[0][i].item() > threshold:
+          result[intent_list[i]] = similarity[0][i].item()
+    else:
+        if similarity[0][i].item() > threshold:
+          result[intent_list[i]] = similarity[0][i].item()
+  top_intents = sorted(result.items(), key=lambda item: item[1], reverse=True)[:n]
+  if not top_intents:
+    top_intents.append(('unknown', 1.0))
+  return top_intents
+def create_embedding(intents:dict, model_en):
+  intents_description_en = []
+  for k,v in intents.items():
+    intents_description_en.append(v)
+  intents_embedding = model_en.encode(intents_description_en)
+  return intents_embedding