Spaces:

JustKiddo
/

IOTraining

Sleeping

App Files Files Community

JustKiddo commited on Dec 12, 2024

Commit

80eee0f

verified ·

1 Parent(s): 7f79d8b

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -9

app.py CHANGED Viewed

@@ -6,17 +6,27 @@ import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 import json
 class VietnameseChatbot:
     def __init__(self, model_name='intfloat/multilingual-e5-small'):
         """
         Initialize the Vietnamese chatbot with pre-loaded model and conversation data
         """
-        # Load pre-trained model and tokenizer
-        print("Loading tokenizer...")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        print("Loading model...")
-        self.model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16)
         # Load comprehensive conversation dataset
         self.conversation_data = self._load_conversation_data()
@@ -40,7 +50,7 @@ class VietnameseChatbot:
             {"query": "Bạn từ đâu đến?", "response": "Tôi được phát triển bởi một nhóm kỹ sư AI, và tôn chỉ của tôi là hỗ trợ con người."},
             # Small talk
-            {"query": "Bạn thích gì?", "response": "Tôi thích học hỏi và giúp đỡ mọi người. Mỗi cuộc trò chuyện là một cơ hội để tôi phát triển."},
             {"query": "Bạn có thể làm gì?", "response": "Tôi có thể trò chuyện, trả lời câu hỏi, và hỗ trợ bạn trong nhiều tình huống khác nhau."},
             # Weather and time
@@ -56,9 +66,11 @@ class VietnameseChatbot:
             {"query": "Bye", "response": "Tạm biệt! Rất vui được trò chuyện với bạn."},
         ]
     def _precompute_embeddings(self):
         """
         Pre-compute embeddings for all conversation queries
         """
         embeddings = []
         for item in self.conversation_data:
@@ -119,6 +131,14 @@ class VietnameseChatbot:
             print(f"Response generation error: {e}")
             return "Đã xảy ra lỗi. Xin vui lòng thử lại."
 def main():
     st.set_page_config(
         page_title="Trợ Lý AI Tiếng Việt",
@@ -128,8 +148,8 @@ def main():
     st.title("🤖 Trợ Lý AI Tiếng Việt")
     st.caption("Trò chuyện với trợ lý AI được phát triển bằng mô hình đa ngôn ngữ")
-    # Initialize chatbot (this will pre-load models and embeddings)
-    chatbot = VietnameseChatbot()
     # Chat history in session state
     if 'messages' not in st.session_state:

 from sklearn.metrics.pairwise import cosine_similarity
 import json
+@st.cache_resource
+def load_model_and_tokenizer(model_name='intfloat/multilingual-e5-small'):
+    """
+    Cached function to load model and tokenizer
+    This ensures the model is loaded only once and reused
+    """
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    print("Loading model...")
+    model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16)
+    return tokenizer, model
 class VietnameseChatbot:
     def __init__(self, model_name='intfloat/multilingual-e5-small'):
         """
         Initialize the Vietnamese chatbot with pre-loaded model and conversation data
         """
+        # Load pre-trained model and tokenizer using cached function
+        self.tokenizer, self.model = load_model_and_tokenizer(model_name)
         # Load comprehensive conversation dataset
         self.conversation_data = self._load_conversation_data()
             {"query": "Bạn từ đâu đến?", "response": "Tôi được phát triển bởi một nhóm kỹ sư AI, và tôn chỉ của tôi là hỗ trợ con người."},
             # Small talk
+            {"query": "Bạn thích gì?", "response": "Tôi thích học hỏi và giú đỡ mọi người. Mỗi cuộc trò chuyện là một cơ hội để tôi phát triển."},
             {"query": "Bạn có thể làm gì?", "response": "Tôi có thể trò chuyện, trả lời câu hỏi, và hỗ trợ bạn trong nhiều tình huống khác nhau."},
             # Weather and time
             {"query": "Bye", "response": "Tạm biệt! Rất vui được trò chuyện với bạn."},
         ]
+    @st.cache_data
     def _precompute_embeddings(self):
         """
         Pre-compute embeddings for all conversation queries
+        Cached to avoid recomputing on every run
         """
         embeddings = []
         for item in self.conversation_data:
             print(f"Response generation error: {e}")
             return "Đã xảy ra lỗi. Xin vui lòng thử lại."
+@st.cache_resource
+def initialize_chatbot():
+    """
+    Cached function to initialize the chatbot
+    This ensures the chatbot is created only once
+    """
+    return VietnameseChatbot()
 def main():
     st.set_page_config(
         page_title="Trợ Lý AI Tiếng Việt",
     st.title("🤖 Trợ Lý AI Tiếng Việt")
     st.caption("Trò chuyện với trợ lý AI được phát triển bằng mô hình đa ngôn ngữ")
+    # Initialize chatbot using cached initialization
+    chatbot = initialize_chatbot()
     # Chat history in session state
     if 'messages' not in st.session_state: