Spaces:

baobuiquang
/

nlqna-chatbot

Paused

App Files Files

baobuiquang commited on Mar 26

Commit

d5b4eea

•

1 Parent(s): 14e2623

update

Browse files

Files changed (1) hide show

app.py +85 -20

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import numpy as np
 import torch
 import time
 from transformers import AutoTokenizer, AutoModel
-from datetime import datetime
 # pd.options.mode.chained_assignment = None  # default='warn'
 # ===========================
@@ -148,13 +148,78 @@ for i in range(len(preprocessed_df_map)):
 # ========== MAIN ==========
 def chatbot_mechanism(message, history, additional_input_1):
     # Clarify namings
     question = message
     sheet_id = additional_input_1
     # Select the right data
     df = preprocessed_df_map[sheet_id]
     x_list_embeddings = x_list_embeddings_map[sheet_id]
     y_list_embeddings = y_list_embeddings_map[sheet_id]
     # Find the position of the needed cell
     question_embedding = text_to_embedding(question)
     x_sim = similarity(question_embedding, x_list_embeddings)
@@ -173,9 +238,9 @@ def chatbot_mechanism(message, history, additional_input_1):
     # Just add some text to warn users
     eval_text = ""
     eval_text_sub_title = ""
-    if x_score < 0.85 or y_score < 0.85:
         eval_text_sub_title = "Cảnh báo:"
-        eval_text = "⚠️ Đặc trưng trích xuất không rõ ràng ⚠️"
     # Score display
     x_score_display = str(round((x_score - 0.8) / (1.0 - 0.8) * 100, 1))
@@ -186,28 +251,28 @@ def chatbot_mechanism(message, history, additional_input_1):
     # Final print
     final_output_message = f"\
-        <div style='color: gray; font-size: 80%; font-family: courier, monospace;'>\
-            Kết quả:\
-        </div>\
-        <div style='font-weight: bold;'>\
-            {cell_value}\
-        </div>\
         <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
             Đặc trưng trích xuất được:\
         </div>\
         • {x_text}<br>\
-        • {y_text}<br>\
-        <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
-            Đánh giá:\
-        </div>\
-        Độ tương quan: [x={x_score_display}%, y={y_score_display}%]<br>\
-        <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
-            {eval_text_sub_title}\
-        </div>\
-        <div style='color: red; font-weight: bold;'>\
-            {eval_text}\
-        </div>\
     "
     return final_output_message
     # for i in range(len(final_output_message)):
     #     time.sleep(0.1)

 import torch
 import time
 from transformers import AutoTokenizer, AutoModel
+from datetime import datetime, timedelta
 # pd.options.mode.chained_assignment = None  # default='warn'
 # ===========================
 # ========== MAIN ==========
 def chatbot_mechanism(message, history, additional_input_1):
     # Clarify namings
     question = message
     sheet_id = additional_input_1
+    # Small preprocess the message to handle unclear cases (ex: "tháng này")
+    extra_information_for_special_cases = ""
+    extra_information_for_special_cases_flag = False
+    unclear_cases = [
+        # Case 0: -> YEAR
+        ["năm này", "năm hiện tại", "năm nay"],
+        # Case 1: -> MONTH, YEAR
+        ["tháng này", "tháng hiện tại", "tháng nay", "tháng bây giờ", "tháng đang diễn ra", "tháng hiện nay", "tháng hiện giờ"],
+        # Case 2: -> DAY, MONTH, YEAR
+        ["ngày này" , "ngày hiện tại", "ngày hôm nay", "hôm nay", "bây giờ", "hiện tại", "thời điểm này", "thời gian này"],
+        # Case 3: -> YEAR
+        ["năm trước", "năm ngoái", "năm qua", "năm vừa rồi", "năm đã qua"],
+        # Case 4: -> MONTH, YEAR
+        ["tháng trước", "tháng qua", "tháng vừa rồi", "tháng đã qua"],
+        # Case 5: -> DAY, MONTH, YEAR
+        ["hôm qua", "hôm trước", "ngày qua", "ngày trước"],
+        # Case 6: -> YEAR
+        ["năm sau", "năm tới", "năm tiếp theo", "năm kế tiếp", "năm sắp tới"],
+        # Case 7: -> MONTH, YEAR
+        ["tháng sau", "tháng tới", "tháng tiếp theo", "tháng kế tiếp", "tháng sắp tới"],
+        # Case 8: -> DAY, MONTH, YEAR
+        ["ngày mai", "ngày sau", "ngày tới", "ngày tiếp theo", "ngày hôm sau", "ngày kế tiếp", "ngày sắp tới"],
+    ]
+    for i in range(len(unclear_cases)):
+        for u in range(len(unclear_cases[i])):
+            if unclear_cases[i][u] in question:
+                # Flag
+                extra_information_for_special_cases_flag = True
+                # Get the current time data
+                current_time = datetime.now()
+                target_time = datetime.now() # Just pre-define
+                # Handle specific cases
+                if i in [0, 1, 2]:
+                    target_time = current_time # No change
+                elif i == 3:
+                    target_time = current_time - timedelta(days = 365)
+                elif i == 4:
+                    target_time = current_time - timedelta(days = 30)
+                elif i == 5:
+                    target_time = current_time - timedelta(days = 1)
+                elif i == 6:
+                    target_time = current_time + timedelta(days = 365)
+                elif i == 7:
+                    target_time = current_time + timedelta(days = 30)
+                elif i == 8:
+                    target_time = current_time + timedelta(days = 1)
+                # Extract time to day, month, year
+                day = str(target_time.strftime('%d').lstrip(''))
+                month = str(target_time.strftime('%m').lstrip(''))
+                year = str(target_time.strftime('%Y').lstrip(''))
+                # Handle specific cases
+                if i in [0, 3, 6]:
+                    extra_information_for_special_cases = f"Năm {year}"
+                elif i in [1, 4, 7]:
+                    extra_information_for_special_cases = f"Tháng {month} năm {year}"
+                elif i in [2, 5, 8]:
+                    extra_information_for_special_cases = f"Ngày {day} tháng {month} năm {year}"
+    if extra_information_for_special_cases_flag == True:
+        question = extra_information_for_special_cases + " " + question
     # Select the right data
     df = preprocessed_df_map[sheet_id]
     x_list_embeddings = x_list_embeddings_map[sheet_id]
     y_list_embeddings = y_list_embeddings_map[sheet_id]
     # Find the position of the needed cell
     question_embedding = text_to_embedding(question)
     x_sim = similarity(question_embedding, x_list_embeddings)
     # Just add some text to warn users
     eval_text = ""
     eval_text_sub_title = ""
+    if x_score <= 0.87 or y_score <= 0.87:
         eval_text_sub_title = "Cảnh báo:"
+        eval_text = "⚠️"
     # Score display
     x_score_display = str(round((x_score - 0.8) / (1.0 - 0.8) * 100, 1))
     # Final print
     final_output_message = f"\
         <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
             Đặc trưng trích xuất được:\
         </div>\
         • {x_text}<br>\
+        • {y_text if extra_information_for_special_cases_flag == False else extra_information_for_special_cases}<br>\
     "
+        # <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
+        #     Đánh giá:\
+        # </div>\
+        # Độ tương quan: [x={x_score_display}%, y={y_score_display}%]<br>\
+        # <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
+        #     Kết quả:\
+        # </div>\
+        # <div style='font-weight: bold;'>\
+        #     {cell_value}\
+        # </div>\
+        # <div style='color: gray; font-size: 80%; font-family: courier, monospace; margin-top: 6px;'>\
+        #     {eval_text_sub_title}\
+        # </div>\
+        # <div style='color: red; font-weight: bold;'>\
+        #     {eval_text}\
+        # </div>\
     return final_output_message
     # for i in range(len(final_output_message)):
     #     time.sleep(0.1)