Spaces:

code5ecure
/

Yavar

Sleeping

App Files Files Community

code5ecure commited on Aug 18, 2025

Commit

12a3d1e

verified ·

1 Parent(s): 4dd6073

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -6

app.py CHANGED Viewed

@@ -55,7 +55,7 @@ def load_training_data():
 def build_rag_index(texts):
     global embedder, index
     try:
-        embedder = SentenceTransformer('xmanii/maux-gte-persian', device='cpu')  # Use CPU to save memory
         embeddings = embedder.encode(texts, convert_to_tensor=True, batch_size=8).cpu().numpy()  # Smaller batch size
         dimension = embeddings.shape[1]
         index = faiss.IndexFlatL2(dimension)
@@ -101,14 +101,17 @@ def chat(message, history):
     if embedder and index:
         try:
             query_emb = embedder.encode(message, convert_to_tensor=True).cpu().numpy()
-            D, I = index.search(query_emb, k=5)  # Increased k for better context
             retrieved = [texts[i] for i in I[0] if i >= 0 and i < len(texts)]
             context = "\n".join(retrieved)
         except Exception as e:
             print(f"Error in RAG retrieval: {e}")
-    # Prepare prompt with context
-    prompt = f"Context: {context}\nUser: {message}\nBot:"
     # Tokenize input
     inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
@@ -118,9 +121,11 @@ def chat(message, history):
         outputs = model.generate(
             input_ids=inputs["input_ids"],
             attention_mask=inputs["attention_mask"],
-            max_length=100,  # Increased for better responses
-            num_beams=7,    # Increased for better quality
             no_repeat_ngram_size=2,
             early_stopping=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)

 def build_rag_index(texts):
     global embedder, index
     try:
+        embedder = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1', device='cpu')  # Better for conversational Persian
         embeddings = embedder.encode(texts, convert_to_tensor=True, batch_size=8).cpu().numpy()  # Smaller batch size
         dimension = embeddings.shape[1]
         index = faiss.IndexFlatL2(dimension)
     if embedder and index:
         try:
             query_emb = embedder.encode(message, convert_to_tensor=True).cpu().numpy()
+            D, I = index.search(query_emb, k=10)  # Increased k for better context
             retrieved = [texts[i] for i in I[0] if i >= 0 and i < len(texts)]
             context = "\n".join(retrieved)
         except Exception as e:
             print(f"Error in RAG retrieval: {e}")
+    # Include conversation history (last 3 exchanges)
+    history_context = "\n".join([f"User: {h['user']} -> Bot: {h['bot']}" for h in conversation_history[-3:]]) if conversation_history else ""
+    # Prepare prompt with context and history
+    prompt = f"شما یک چت‌بات فارسی مفید و دوستانه هستید. فقط به سؤال کاربر پاسخ کوتاه و مرتبط بدهید و از اطلاعات زمینه فقط برای کمک به پاسخ استفاده کنید:\nContext: {context}\nHistory: {history_context}\nUser: {message}\nBot:"
     # Tokenize input
     inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
         outputs = model.generate(
             input_ids=inputs["input_ids"],
             attention_mask=inputs["attention_mask"],
+            max_length=150,
+            num_beams=10,
             no_repeat_ngram_size=2,
+            temperature=0.8,  # Slightly increased for better diversity
+            top_p=0.9,       # Added for better response quality
             early_stopping=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)