faldeus0092 commited on
Commit
caa0e13
1 Parent(s): dd0401b

first commit

Browse files
Files changed (13) hide show
  1. .gitignore +4 -0
  2. app.py +218 -0
  3. embeddings.npy +3 -0
  4. flows.py +171 -0
  5. intents.py +109 -0
  6. new_embed_0.npy +3 -0
  7. new_embed_1.npy +3 -0
  8. new_embed_2.npy +3 -0
  9. new_embed_test.npy +3 -0
  10. openai_embeddings.npy +3 -0
  11. prompt.py +14 -0
  12. requirements.txt +3 -0
  13. utility_func.py +58 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ create_openai_embed.py
2
+ asdf
3
+ __pycache__
4
+ env
app.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title Model Loading
2
+ from utility_func import get_history_from_prompt, get_latest_user_input_from_prompt, get_top_intents, create_embedding
3
+ from intents import intents, intents_sentence_similarity_en, chatbot_intents
4
+ from prompt import prompt_template
5
+ import flows
6
+ import os
7
+ import gradio as gr
8
+ import pandas as pd
9
+ import langchain
10
+ from langchain import PromptTemplate, LLMChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from datetime import date
13
+ import numpy as np
14
+ from openai import OpenAI
15
+ import time
16
+
17
+ with open('new_embed_2.npy', 'rb') as f:
18
+ openai_intents_embedding = np.load(f)
19
+
20
+ llm = None
21
+ llm_chain = None
22
+
23
+ def cosine_similarity(a, b):
24
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
25
+
26
+ def get_embedding(text, api_key, model="text-embedding-ada-002"):
27
+ client = OpenAI(api_key = api_key)
28
+ text = text.replace("\n", " ")
29
+ return client.embeddings.create(input = [text], model=model).data[0].embedding
30
+
31
+ def raw_inference(input, recv_state, n_samples, threshold, api_key):
32
+ state = flows.STATE_FLOWS_MAP[recv_state]
33
+ t1 = time.time()
34
+ query_embedding = get_embedding(input, api_key)
35
+ similarity = cosine_similarity(openai_intents_embedding, query_embedding)
36
+ result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=state)
37
+ t2 = time.time()
38
+ latency = t2-t1
39
+ print("latency: %s", latency)
40
+ return result, gr.Button("Ask intent with Language Model", visible=True)
41
+
42
+ def process_csv(files):
43
+ global df
44
+ df = pd.read_csv(files, low_memory=False)
45
+ df = df[df['chatbot_response'].isin(intents)]
46
+ df = df[["user_message","prompt", "chatbot_response", "state"]]
47
+ df.dropna(inplace=True)
48
+ df = df.reset_index()
49
+ df.drop('index', axis='columns')
50
+ df_length = len(df.index)
51
+
52
+ chat = get_latest_user_input_from_prompt(df.iloc[1]["prompt"])
53
+ history = get_history_from_prompt(df.iloc[1]["prompt"])
54
+ state = flows.STATE_FLOWS_MAP[df.iloc[1]['state']]
55
+ label = df.iloc[1]['chatbot_response']
56
+
57
+ # accuracy = gr.Markdown("""
58
+
59
+ # You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
60
+
61
+ # """, visible=True)
62
+ # accuracy_button = gr.Button("Calculate Accuracy", visible=True)
63
+
64
+ return (gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single", visible=False),
65
+ files,
66
+ gr.Slider(1, df_length, value=1, step=1, visible=True, label="Index", info="Select which index of data to check the intents"),
67
+ gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
68
+ gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
69
+ gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
70
+ gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))
71
+
72
+ def update_index(index):
73
+ chat = get_latest_user_input_from_prompt(df.iloc[int(index)]["prompt"])
74
+ history = get_history_from_prompt(df.iloc[int(index)]["prompt"])
75
+ state = df.iloc[int(index)]['state']
76
+ label = df.iloc[int(index)]['chatbot_response']
77
+ return (gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
78
+ gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
79
+ gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
80
+ gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))
81
+
82
+ def check_accuracy(n_samples, threshold):
83
+ global df
84
+ res_list = []
85
+ for index, row in df.iterrows():
86
+ # chat = get_history_from_prompt(row["prompt"])
87
+ chat = get_latest_user_input_from_prompt(row["prompt"])
88
+ query_embedding = get_embedding(chat)
89
+ flow = flows.STATE_FLOWS_MAP[row['state']]
90
+ similarity = cosine_similarity(openai_intents_embedding, query_embedding)
91
+ result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=flow)
92
+
93
+ label = row['chatbot_response']
94
+ isPredictedTrue=0
95
+ for item in result:
96
+ if label in item:
97
+ isPredictedTrue=1
98
+ break
99
+ res_list.append({'state': row['state'], 'gt': label, 'isPredictedTrue': isPredictedTrue})
100
+
101
+ res_df = pd.DataFrame(res_list)
102
+
103
+ # dataframe result
104
+ grouped_data = res_df.groupby('gt')['isPredictedTrue'].agg(['sum', 'count']).reset_index()
105
+ grouped_data['percentage'] = (grouped_data['sum'] / grouped_data['count']) * 100
106
+
107
+ # accuracy score
108
+ score = (res_df['isPredictedTrue'] == 1).sum()/res_df['isPredictedTrue'].count() * 100 #raw
109
+
110
+ print(score, grouped_data)
111
+ return score, grouped_data
112
+
113
+ def classify_intent(input_text:str, history:str, answer, model_name, api_key):
114
+ print(f"predicting with llm... date: {date.today()}")
115
+ print(f"model name: {model_name}")
116
+ llm = ChatOpenAI(model=model_name, temperature='0.1', openai_api_key=api_key)
117
+ prompt = PromptTemplate(template=prompt_template, input_variables=["intents", "INPUT", "chatHistory"])
118
+ llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)
119
+
120
+ inp_intents = ''
121
+ for i in range(len(answer)):
122
+ inp_intents += answer[i][0]+": "+chatbot_intents[answer[i][0]]+"\n"
123
+ predicted_intent = llm_chain.run({"intents":inp_intents, "INPUT": input_text, "chatHistory": history})
124
+ prompt_result = llm_chain.prompt.format_prompt(intents = inp_intents, INPUT = input_text, chatHistory = history).to_string()
125
+ return predicted_intent, prompt_result
126
+
127
+ theme = gr.themes.Default(
128
+ primary_hue="indigo",
129
+ secondary_hue="pink",
130
+ neutral_hue="slate",
131
+ )
132
+
133
+ with gr.Blocks(title="Intent Classification Demo", theme=theme) as interface:
134
+ gr.Markdown("""# Demo for Intent Classification""")
135
+
136
+ with gr.Row(equal_height=True):
137
+ with gr.Column():
138
+ model_name = gr.Dropdown(["gpt-3.5-turbo",
139
+ "gpt-3.5-turbo-1106",
140
+ "gpt-4",
141
+ "gpt-4-1106-preview"],
142
+ label="Model name",
143
+ info="Select model name for GPT")
144
+ api_key = gr.Textbox(label="OpenAI API Key", info="get it at https://platform.openai.com/account/api-keys",visible=True, lines=1, type="password")
145
+ n_samples = gr.Slider(1, 10, value=10, step=1, label="N samples", info="Number of samples to be retrieved. Default is 5")
146
+ threshold = gr.Slider(0.0, 1.0, value=0.13, step=0.01, label="Threshold", info="Threshold of cosine similarity which intent will be considered similar to the input. The higher, the more similar the intent will be. Default is 0.75")
147
+ with gr.Tab("Input from raw text"):
148
+ raw_input_text = gr.Textbox(label="Input Chat", info="Input your chat here, the model will predict the intent")
149
+ raw_state = gr.Dropdown(["GeneralState",
150
+ "HomeworkState",
151
+ "ExerciseState",
152
+ "UnderstandState",
153
+ "RecommendMaterialState",
154
+ "PersonalState",
155
+ "AssessKnowledgeState"],
156
+ label="State",
157
+ info="Select state on which the chat currently on. Some state will exclude some intents")
158
+ raw_history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True)
159
+ raw_ask_button = gr.Button("Ask")
160
+ ask_llm_button_raw = gr.Button("Ask intent with Language Model", visible=False)
161
+
162
+ with gr.Tab("Input from Big Query data"):
163
+ gr.Markdown("""
164
+ ## Guide:
165
+
166
+ Assuming have access to BigQuery, you can query the table `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw`, export result as CSV file, and upload here (make sure your query contains these columns: `prompt, user_message, chatbot_response, state`)
167
+
168
+ ```SELECT prompt, user_message, chatbot_response, state FROM `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw` WHERE DATE(_PARTITIONTIME) BETWEEN DATE("2023-11-13") AND DATE("2023-11-19") AND service_name = 'learning_companion' LIMIT 1000```
169
+
170
+ Adjust the date according to needs. After that, export as CSV and upload to this gradio
171
+
172
+ example CSV files to use:
173
+
174
+ https://drive.google.com/file/d/1iDLywKP5JxDJXaAzomSUYLZRWvoGqpt5/view?usp=sharing
175
+
176
+ https://drive.google.com/file/d/1Jh_hP7U2JGQXsRo9OponyVSHL_s1Yx8w/view?usp=sharing
177
+
178
+ """)
179
+
180
+ file_output = gr.File()
181
+ upload_button = gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single")
182
+
183
+ index = gr.Slider(1, 1000, value=5, step=1, visible=False, label="Index", info="Select which index of data to check the intents")
184
+ input_text = gr.Textbox(label="Input Chat", info="Input in index", visible=False)
185
+ state = gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=False)
186
+ history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=False)
187
+ gt = gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=False)
188
+ ask_button = gr.Button("Ask With CSV")
189
+ ask_llm_button = gr.Button("Ask intent with Language Model", visible=False)
190
+
191
+
192
+ index.change(fn=update_index, inputs=index, outputs=[input_text, state, gt, history])
193
+ upload_button.upload(process_csv, upload_button, [upload_button, file_output, index, input_text, state, gt, history])
194
+ with gr.Column():
195
+ with gr.Row():
196
+ accuracy = gr.Markdown("""
197
+
198
+ You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
199
+
200
+ """, visible=True)
201
+ accuracy_button = gr.Button("Calculate Accuracy", visible=True)
202
+ accuracy_score = gr.Label(label="Accuracy result", visible=True)
203
+ accuracy_table = gr.Dataframe(visible=True)
204
+
205
+ with gr.Column():
206
+ answer = gr.JSON(label="Sentence Similarity Prediction", show_label=True)
207
+ LLM_prediction = gr.Label(label="LLM Prediction Result", visible=True)
208
+ LLM_prompt = gr.Textbox(label="Prompt Used for Language Model", info="Showing prompt used in language model", visible=True)
209
+
210
+ accuracy_button.click(fn=check_accuracy, inputs=[n_samples, threshold], outputs=[accuracy_score, accuracy_table])
211
+ raw_ask_button.click(fn=raw_inference, inputs=[raw_input_text, raw_state, n_samples, threshold, api_key], outputs=[answer, ask_llm_button_raw])
212
+ ask_button.click(fn=raw_inference, inputs=[input_text, state, n_samples, threshold, api_key], outputs=[answer, ask_llm_button])
213
+ ask_llm_button.click(fn=classify_intent, inputs=[input_text, history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])
214
+ ask_llm_button_raw.click(fn=classify_intent, inputs=[raw_input_text, raw_history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])
215
+
216
+ # interface.launch(debug=True)
217
+ # interface.launch(share=True, debug=True)
218
+ interface.launch(inline=True)
embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acf625b84e53b36e293c99f1bccb7e3cb7024357c4489e8d19000c1d0878846
3
+ size 98432
flows.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title flows.py
2
+ GENERAL_STATE_FLOWS = [
3
+ "homework",
4
+ "homework_with_question",
5
+ "recommend",
6
+ "recommend_with_topic",
7
+ "assess_knowledge",
8
+ "assess_knowledge_with_topic",
9
+ "explain_drill",
10
+ "exercise",
11
+ "exercise_with_topic",
12
+ "personal",
13
+ "finish_learning",
14
+ "provide_topic",
15
+ "provide_subject",
16
+ "provide_subtopic",
17
+ "provide_concept",
18
+ "general_intent",
19
+ "explain_answer",
20
+ "explain_answer_with_topic",
21
+ "back_to_study",
22
+ "summarize",
23
+ "unknown"
24
+ ]
25
+
26
+ UNDERSTAND_STATE_FLOWS = [
27
+ "homework",
28
+ "homework_with_question",
29
+ "provide_topic",
30
+ "provide_subject",
31
+ "explain_answer",
32
+ "explain_answer_with_topic",
33
+ "core_explanation_repeat",
34
+ "core_explanation_exercise",
35
+ "core_explanation_exercise_with_topic",
36
+ "core_explanation_video",
37
+ "core_explanation_video_with_topic",
38
+ "core_explanation_protips",
39
+ "negative_respond",
40
+ "positive_respond",
41
+ "assess_knowledge",
42
+ "assess_knowledge_with_topic",
43
+ "recommend",
44
+ "recommend_with_topic",
45
+ "exercise",
46
+ "exercise_with_topic",
47
+ "personal",
48
+ "unknown",
49
+ "general_intent"
50
+ ]
51
+
52
+ HOMEWORK_STATE_FLOWS = ["homework_with_question",
53
+ "explain_answer",
54
+ "explain_answer_with_topic",
55
+ "provide_topic",
56
+ "provide_subject",
57
+ "homework",
58
+ "recommend",
59
+ "recommend_with_topic",
60
+ "assess_knowledge",
61
+ "assess_knowledge_with_topic",
62
+ "exercise",
63
+ "exercise_with_topic",
64
+ "exercise_multiple_question",
65
+ "exercise_multiple_question_with_topic",
66
+ "similar_question",
67
+ "personal",
68
+ "finish_learning",
69
+ "unknown",
70
+ "back_to_study",
71
+ ]
72
+
73
+ RECOMMEND_MATERIAL_FLOWS = [
74
+ "homework",
75
+ "homework_with_question",
76
+ "understand",
77
+ "understand_with_topic",
78
+ "recommend",
79
+ "recommend_with_topic",
80
+ "provide_topic",
81
+ "provide_subject",
82
+ "explain_answer",
83
+ "assess_knowledge",
84
+ "assess_knowledge_with_topic",
85
+ "exercise",
86
+ "exercise_with_topic",
87
+ "exercise_multiple_question",
88
+ "exercise_multiple_question_with_topic",
89
+ "personal",
90
+ "finish_learning",
91
+ "go_back_to_general",
92
+ "unknown"
93
+ ]
94
+
95
+ PERSONAL_STATE_FLOWS = [
96
+ "homework",
97
+ "homework_with_question",
98
+ "recommend",
99
+ "recommend_with_topic",
100
+ "assess_knowledge",
101
+ "assess_knowledge_with_topic",
102
+ "personal",
103
+ "exercise",
104
+ "exercise_with_topic",
105
+ "explain_answer",
106
+ "explain_answer_with_topic",
107
+ "provide_topic",
108
+ "provide_subject",
109
+ "back_to_study",
110
+ "finish_learning",
111
+ "unknown"
112
+ ]
113
+
114
+ EXERCISE_STATE_FLOWS = [
115
+ "exercise",
116
+ "exercise_with_image",
117
+ "exercise_with_topic",
118
+ "exercise_multiple_question",
119
+ "exercise_multiple_question_with_topic",
120
+ "provide_topic",
121
+ "provide_subject",
122
+ "explain_answer",
123
+ "explain_answer_with_topic",
124
+ "provide_answer",
125
+ "similar_question",
126
+ "assess_knowledge",
127
+ "assess_knowledge_with_topic",
128
+ "homework",
129
+ "homework_with_question",
130
+ "recommend",
131
+ "recommend_with_topic",
132
+ "finish_learning",
133
+ "personal",
134
+ "back_to_study",
135
+ "unknown"
136
+ ]
137
+
138
+ ASSESS_KNOWLEDGE_STATE_FLOWS = [
139
+ "assess_knowledge",
140
+ "assess_knowledge_with_topic",
141
+ "assess_knowledge_answer",
142
+ "explain_drill",
143
+ "provide_topic",
144
+ "provide_subject",
145
+ "diagnosis_result",
146
+ "explain_answer",
147
+ "explain_answer_with_topic",
148
+ "homework",
149
+ "homework_with_question",
150
+ "recommend",
151
+ "recommend_with_topic",
152
+ "exercise",
153
+ "exercise_with_topic",
154
+ "exercise_multiple_question",
155
+ "exercise_multiple_question_with_topic",
156
+ "general_intent",
157
+ "finish_learning",
158
+ "personal",
159
+ "back_to_study",
160
+ "unknown"
161
+ ]
162
+
163
+ STATE_FLOWS_MAP = {
164
+ "GeneralState":GENERAL_STATE_FLOWS,
165
+ "HomeworkState":HOMEWORK_STATE_FLOWS,
166
+ "ExerciseState":EXERCISE_STATE_FLOWS,
167
+ "UnderstandState":UNDERSTAND_STATE_FLOWS,
168
+ "RecommendMaterialState":RECOMMEND_MATERIAL_FLOWS,
169
+ "PersonalState":PERSONAL_STATE_FLOWS,
170
+ "AssessKnowledgeState":ASSESS_KNOWLEDGE_STATE_FLOWS,
171
+ }
intents.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ intents_sentence_similarity_en = {
2
+ "homework":"Intent to do exercise questions or homework. (Bantu Kerjakan PR, Bantu PR,tanya soal, tanya pr)",
3
+ "homework_with_question":"Curious and asking spesific questions or homework.(Bantu Kerjakan soal ini, Bisa kerjakan soal ini, Beritahu jawabannya)",
4
+ "explain_answer":"Wants explanation/study about the spesific answer (Belajar Lebih Dalam/Ceritakan lebih lanjut/jelaskan dari awal/jelaskan/jelasin lebih lanjut/belum paham)",
5
+ "explain_answer_with_topic":"Wants explanation/study about spesific topics (Belajar Lebih Dalam terkait/Ceritakan lebih lanjut/jelaskan dari awal/jelaskan lebih lanjut/belum paham)",
6
+ "core_explanation_repeat":"Repeat the explanation of a question (Jelasin ulang/Jelaskan ulang/Jelaskan lagi)",
7
+ "core_explanation_exercise":"Explanation from Example questions. (Contoh Soal/Contoh soal dong/bahas dengan soal)",
8
+ "core_explanation_exercise_with_topic":"Explanation from Example questions. (Contoh Soal/Contoh soal dong/bahas dengan soal)",
9
+ "core_explanation_video":"Example questions with a video. (Video Penjelasan/Bahas dengan video/mau video penjelasan)",
10
+ "core_explanation_video_with_topic":"Example questions with a video. (Video Penjelasan/Bahas dengan video/mau video penjelasan)",
11
+ "core_explanation_protips":"Suggestions or recommendations about Tips and tricks",
12
+ "negative_respond":"Did not understand the explanation (aku belum paham, masih belum paham, aku gangerti, aku belum mengerti, masih bingung, aku kesusahan)",
13
+ "positive_respond":"Understanding the explanation (aku sudah paham, oke paham kak, oke mengerti, oke aman, lanjut kak)",
14
+ "recommend":"Suggestions or recommendations (Rekomendasi Materi/Suggested Materials/Materi Terkait)",
15
+ "recommend_with_topic":"Suggestions or recommendations about certain lesson topic (Aku mau materi /lagi pengen materi /mau materi dong kak/Rekomendasi materi kak/Materi terkait)",
16
+ "assess_knowledge":"Assess/diagnosis skill/knowledge",
17
+ "assess_knowledge_with_topic":"Assess/diagnosis skill/knowledge",
18
+ "assess_knowledge_answer":"Follow up to assess knowledge result. (jawabanku kok salah ya, kenapa bisa salah ya jawabanku, aku gabisa jawab, wah jawabanku bener wkwk, yeay bener semua jawabanku, jawabanku salah 1 aja yeay, cuman bener 1 jawabanku, secara garis besar udah bagus ya hasilku, perlu peningkatan belajar lagi nih)",
19
+ "exercise":"Want to do exercise questions (Latihan Soal, aku mau soal, aku butuh soal, bantu carikan soal dong, mau soal dong, mau pertanyaan dong, latihan pertanyaan, coba ulang pertanyaan nya, ulangi pertanyaanya karena tadi kurang jelas, minta soal)",
20
+ "exercise_with_image":"Want to do exercise questions with an image",
21
+ "exercise_with_topic":"Want to do exercise questions of a topic (carikan soal integral, mau soal fisika, berikan aku soal tata surya, soal geografi, latihan soal ekonomi, soal biologi)",
22
+ "exercise_multiple_question":"Wants to do exercise with a number of question",
23
+ "exercise_multiple_question_with_topic":"Wants to do exercise with a number of question with topics",
24
+ "personal":"Sharing personal story (aku mau curhat, kak lagi sedih)",
25
+ "general_intent":" casual chatting, Greetings (halo kak, selamat pagi), Asking help about study and exams in general (aku pusing, aku stres, aku mau belajar kak, besok ulangan, butuh temen belajar, besok ujian, temenin aku, bantu aku, aku butuh bantuan)",
26
+ "provide_topic":"Lesson material subject/topic/theme/problem/matter/point/issue/area/ (Fisika, Kimia, Matematika, Biologi, Sejarah, Bahasa Indonesia, Geografi, Biologi, Bahasa Inggris)",
27
+ "provide_subject":"Lesson material Subject (Fisika, Kimia, Matematika, Biologi, Sejarah, Bahasa Indonesia, Geografi, Biologi, Bahasa Inggris)",
28
+ "provide_subtopic":"Lesson material Subtopic",
29
+ "provide_concept":"Explain concept",
30
+ "finish_learning":"Finished/already learned (Terimakasih /Thank you/No/Sudah paham!/Ok/Tidak)",
31
+ "back_to_study":"Return to study (Kembali Belajar) /Mulai Belajar?/langsung mulai belajar",
32
+ "similar_question":"Create similar exercise questions (Buatkan Soal Serupa / Cari Soal Serupa / Soal lagi yang mirip / Latihan soal yang mirip)",
33
+ "provide_answer":"Answering exercise questions (A, B, C, D, E) or more than one characters (Jawabanya A, B, C, D, E, Menurutku jawabanya A, B, C, D, E)",
34
+ "diagnosis_result":"See diagnosis or score result (Lihat hasil diagnosis / hasil drill)",
35
+ "explain_drill":"Explain exercise question drill material (bahas lewat chat/bahas soal nomer 1 ya/bahas soal nomer 2 ya/bahas soal nomer 3 ya/bahas soal nomer 4 ya/bahas soal nomer 5 ya)"
36
+ }
37
+
38
+ intents = ['homework',
39
+ 'homework_with_question',
40
+ 'explain_answer',
41
+ 'explain_answer_with_topic',
42
+ 'core_explanation_repeat',
43
+ 'core_explanation_exercise',
44
+ 'core_explanation_exercise_with_topic',
45
+ 'core_explanation_video',
46
+ 'core_explanation_video_with_topic',
47
+ 'core_explanation_protips',
48
+ 'negative_respond',
49
+ 'positive_respond',
50
+ 'recommend',
51
+ 'recommend_with_topic',
52
+ 'assess_knowledge',
53
+ 'assess_knowledge_with_topic',
54
+ 'assess_knowledge_answer',
55
+ 'exercise',
56
+ 'exercise_with_image',
57
+ 'exercise_with_topic',
58
+ 'exercise_multiple_question',
59
+ 'exercise_multiple_question_with_topic',
60
+ 'personal',
61
+ 'general_intent',
62
+ 'provide_topic',
63
+ 'provide_subject',
64
+ 'provide_subtopic',
65
+ 'provide_concept',
66
+ 'finish_learning',
67
+ 'back_to_study',
68
+ 'similar_question',
69
+ 'provide_answer',
70
+ 'diagnosis_result',
71
+ 'explain_drill']
72
+
73
+ chatbot_intents = {
74
+ "homework":"The student needs help to do their homework (PR). The student doesn't provide the question, only the intention. (Bantu Kerjakan PR, Bantu PR,tanya soal, tanya pr, tanya soal lain, tanya soal yang lain, tanya yang lain",
75
+ "homework_with_question":"Help students with their homework (PR), the student provide the specific question about their homework. ('berapa 1+1 ?', 'berapa kecepatan mobil yang menempuh jarak 100km dalam waktu 2 jam ?', 'apa yang dimaksud dengan monokotil dan dikotil ?')",
76
+ "explain_answer":"Student is curious or wants more explanation about the answer (Belajar Lebih Dalam/Ceritakan lebih lanjut/jelaskan dari awal/jelaskan/jelasin lebih lanjut/belum paham)",
77
+ "explain_answer_with_topic":"Student is curious or wants more explanation about specific topics (Belajar Lebih Dalam terkait termodinamika/Ceritakan lebih lanjut bab matematika integral/jelaskan dari awal materi mitosis/jelaskan lebih lanjut pelajaran listrik/belum paham terkait materi past tenses)",
78
+ "core_explanation_repeat":"Student wants repeat explanation (Jelasin ulang/Jelaskan ulang/Jelaskan lagi)",
79
+ "core_explanation_exercise":"Student wants explaination with exercise (Contoh Soal/Contoh soal dong/bahas dengan soal)",
80
+ "core_explanation_exercise_with_topic":"Student wants explaination with exercise (Contoh Soal integral/Contoh soal stikiometri dong/bahas dengan soal fotosintesis dong)",
81
+ "core_explanation_video":"Student wants explanation with video (Video Penjelasan/Bahas dengan video/mau video penjelasan)",
82
+ "core_explanation_video_with_topic":"Student wants explanation with video and provides topics (Video Penjelasan laju reaksi/Bahas dengan video integral/mau video penjelasan fotositesis)",
83
+ "core_explanation_protips":"Student wants tips and tricks (protips/tips dan trik)",
84
+ "negative_respond":"Student respond they dont understand the explanation (aku belum paham, masih belum paham, aku gangerti, aku belum mengerti, masih bingung)",
85
+ "positive_respond":"Student respond they understand the explanation (aku sudah paham, oke paham kak, oke mengerti, oke aman, lanjut kak)",
86
+ "recommend":"The student want learning materials suggestions (Rekomendasi Materi/Suggested Materials/Materi Terkait)",
87
+ "recommend_with_topic":"The student want learning materials suggestions, students provide the topic (Aku mau materi integral/lagi pengen materi aritmatika/mau materi kimia dong kak/Rekomendasi materi tata surya kak/Materi terkait termodinamika)",
88
+ "assess_knowledge":"The student want their skill/knowledge to be assessed (Cek Kemampuan/Uji kemampuan/diagnosis kemampuan/mengasah kemampuan/Belajar Lebih Efektif/Cek pemahamanku)",
89
+ "assess_knowledge_with_topic":"The student want their skill/knowledge to be assessed and student provide topics(Cek Kemampuan integral/Uji kemampuan tata surya/diagnosis kemampuan termodinamika/mengasah kemampuan hukum newton/Cek pemahaman aljabar linear)",
90
+ "assess_knowledge_answer":"The student answer to follow up Diagnosis Kemampuan result. (jawabanku kok salah ya, kenapa bisa salah ya jawabanku, aku gabisa jawab, wah jawabanku bener wkwk, yeay bener semua jawabanku, jawabanku salah 1 aja yeay, cuman bener 1 jawabanku, secara garis besar udah bagus ya hasilku, perlu peningkatan belajar lagi nih)",
91
+ "exercise":"The student want to do exercise (Latihan Soal, aku mau soal, aku butuh soal, bantu carikan soal dong, mau soal dong, mau pertanyaan dong, latihan pertanyaan, coba ulang pertanyaan nya, ulangi pertanyaanya karena tadi kurang jelas, minta soal)",
92
+ "exercise_with_image":"The student want to do exercise with an image (aku mau soal dengan gambar, aku butuh soal pake gambar, bantu carikan soal menggunakan gambar dong, mau soal pake gambar, minta soal yang ada gambarnya)",
93
+ "exercise_with_topic":"The student want to do exercise and students provide topic (carikan soal integral, mau soal fisika, berikan aku soal tata surya, soal geografi, latihan soal ekonomi, soal biologi)",
94
+ "exercise_multiple_question":"The student wants exercise with multiple / more than one question without topics (langsung 2 soal dong, mau 10 soal sekaligus, berikan aku 4 soal)",
95
+ "exercise_multiple_question_with_topic":"The student wants exercise with multiple / more than one question with topics (minta soal 2 fotosintesis kak, mau 10 soal sekaligus aritmatika, berikan aku 4 soal termodinamika fisika)",
96
+ "personal":"The student want to share their personal story (aku mau curhat, kak lagi sedih)",
97
+ "general_intent":"The student intent in general about study. (aku pusing, aku mau belajar kak, besok ulangan, butuh temen belajar, besok ujian, temenin aku, bantu aku, aku butuh bantuan)",
98
+ "provide_topic":"The student provide topics. the topic is more specific hierarchy rather than subject example topic is integral, stoikiometri, termodinamika, mitosis. Usually only one words",
99
+ "provide_subject":"The student provide subjects. example subjects are matematika, fisika, kimia, biologi, sejarah, geografi, fisika. Usually only one words",
100
+ "provide_subtopic":"The student provide subtopic. subtopic is more specific compared to subjects and topics. for example subjects: fisika, topics: energi, subtopics: energi termal. for example subtopics: energi kinetik, aljabar linear, fungsi trigonometri. Usually more than one words",
101
+ "provide_concept":"The student provide concept. concept is more specific compared to subjects, topics, and subtopics. for example subjects: fisika, topics: energi, subtopics: energi termal, concept: mengerti suhu dan kesetimbangan termal. for example concepts: sistem persamaan linear dua variabel, prinsip kekekalan energi termal, hukum termodinamika pertama. Usually more than one words",
102
+ "finish_learning":"Students feel that they have enough/finish about the learning they are doing.",
103
+ "back_to_study":"students wants to back study (Kembali Belajar)",
104
+ "similar_question":"students wants to create variations of questions (Buatkan Soal Serupa / Cari Soal Serupa / Soal lagi yang mirip / Latihan soal yang mirip)",
105
+ "provide_answer":"students answer the questions from chatbot multiple questions. Sometimes students only answer with 1 characters like (A, B, C, D) or more than one characters (Jawabanya B, B. tata surya, D.trigonometri, Menurutku jawabanya C)",
106
+ "diagnosis_result":"students want to see their diagnosis result (Lihat hasil diagnosis / hasil drill)",
107
+ "explain_drill":"students want to know more about the drill material topics/subtopics (bahas lewat chat/bahas soal nomer 1 ya/bahas soal nomer 2 ya/bahas soal nomer 3 ya/bahas soal nomer 4 ya/bahas soal nomer 5 ya)"
108
+ }
109
+
new_embed_0.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4402154a8ad3b7bc0a488f7854282b336ec3883dce7858502d50491523cd69
3
+ size 393344
new_embed_1.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57945b6363b37b9e74ac9ce4066c7c844fadb6a557c6babfd57ecd2c732a6a4
3
+ size 393344
new_embed_2.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:284be7423a0f91f70b17196fe8322936d6d758f63a47d3f0470dcbb6403f3abc
3
+ size 417920
new_embed_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb9de6659ffb3014d2ae9693b1bc87b684892623d27f9c521d8f5cc26cf4980
3
+ size 417920
openai_embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae474b523b222e95d39016a80eb507b962dc0cebe105744879af1e695d4d456
3
+ size 393344
prompt.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title prompt.py
2
+ prompt_template = """
3
+ The given message needs to be mapped to exactly one of the intents described below. Only answer with the intent name.
4
+
5
+ {intents}
6
+ unknown: You don't find the matching intent from the above list
7
+
8
+ Message: {INPUT}
9
+
10
+ Here are previous chats or summary conversation for your reference (only use this if you need further information to infer the intent):
11
+ {chatHistory}
12
+
13
+ Intent:
14
+ """
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ langchain
3
+ openai
utility_func.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title Utility Functions
2
+ def get_history_from_prompt(prompt:str):
3
+ if "Here are previous chats for your reference (only use this if you need further information to infer the intent):" in prompt:
4
+ history = prompt.split("Here are previous chats for your reference (only use this if you need further information to infer the intent):")
5
+ else:
6
+ history = prompt.split("Here are previous chats or summary conversation for your reference (only use this if you need further information to infer the intent):")
7
+ return history[1].replace("""The Intent:""", '')
8
+
9
+ def get_latest_user_input_from_prompt(prompt:str):
10
+ input = prompt.split("Here is the message you are to classify:")
11
+ if "Here are previous chats for your reference (only use this if you need further information to infer the intent):" in prompt:
12
+ input = input[1].split("Here are previous chats for your reference (only use this if you need further information to infer the intent):")
13
+ else:
14
+ input = input[1].split("Here are previous chats or summary conversation for your reference (only use this if you need further information to infer the intent)")
15
+ return input[0]
16
+
17
+ # Get the top 5 intents with the highest values
18
+ def get_top_intents(intent_list:list, similarity, n=5, threshold=0.3, flow=None) -> str:
19
+ result = dict()
20
+ for i in range(len(intent_list)):
21
+ if flow:
22
+ if intent_list[i] in flow:
23
+ # print("intent {} is ignored, because it's not in the possible intent".format(intent_list[i]))
24
+ if similarity[i].item() > threshold:
25
+ result[intent_list[i]] = similarity[i].item()
26
+ else:
27
+ if similarity[i].item() > threshold:
28
+ result[intent_list[i]] = similarity[i].item()
29
+
30
+ top_intents = sorted(result.items(), key=lambda item: item[1], reverse=True)[:n]
31
+
32
+ if not top_intents:
33
+ top_intents.append(('unknown', 1.0))
34
+ return top_intents
35
+
36
+ def create_embedding(intents:dict, model_en):
37
+ intents_description_en = []
38
+ for k,v in intents.items():
39
+ intents_description_en.append(v)
40
+ intents_embedding = model_en.encode(intents_description_en)
41
+ return intents_embedding
42
+
43
+ # def get_embedding(text, model="text-embedding-ada-002"):
44
+ # text = text.replace("\n", " ")
45
+ # return client.embeddings.create(input = [text], model=model).data[0].embedding
46
+
47
+ # from openai import OpenAI
48
+ # import numpy as np
49
+ # client = OpenAI()
50
+
51
+ # def create_embedding_openai(intents:dict):
52
+ # intents_description_en = []
53
+ # for k,v in intents.items():
54
+ # intents_description_en.append(v)
55
+ # embeddings = np.zeros((len(intents_description_en), 1536))
56
+ # for i, text in enumerate(intents_description_en):
57
+ # embeddings[i,:] = get_embedding(text)
58
+ # return embeddings