Spaces:

ZEROTSUDIOS
/

chatbot-bipa-api2

Running

App Files Files Community

ZEROTSUDIOS commited on Jul 16

Commit

e1f7ca9

verified ·

1 Parent(s): 39ce8c5

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -33

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-os.environ["HF_HOME"] = "/tmp/huggingface"  # ✅ custom safe cache dir
 from flask import Flask, request, jsonify
 from flask_cors import CORS
@@ -15,12 +15,12 @@ import re
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
 os.makedirs(nltk_data_path, exist_ok=True)
 nltk.data.path.append(nltk_data_path)
 nltk.download('stopwords', download_dir=nltk_data_path, quiet=True)
-nltk.download('punkt', download_dir=nltk_data_path, quiet=True)
 nltk.download('wordnet', download_dir=nltk_data_path, quiet=True)
 logging.basicConfig(level=logging.INFO,
@@ -55,7 +55,7 @@ class BookRecommender:
             return ""
         text = text.lower()
         text = re.sub(r'[^\w\s]', ' ', text)
-        tokens = nltk.word_tokenize(text)
         tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
         return ' '.join(tokens)
@@ -79,31 +79,6 @@ class BookRecommender:
             logger.error(f"Error loading model: {str(e)}", exc_info=True)
             return False
-    def recommend_books(self, user_query, top_n=5, include_description=True):
-        if self.model is None or self.book_embeddings is None or self.df is None:
-            return []
-        try:
-            processed_query = self.preprocess_text(user_query)
-            user_embedding = self.model.encode([processed_query])
-            similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
-            similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
-            recommendations = []
-            for i, idx in enumerate(similar_books_idx):
-                book_data = {
-                    'title': self.df.iloc[idx].get('Title', ''),
-                    'author': self.df.iloc[idx].get('Authors', ''),
-                    'category': self.df.iloc[idx].get('Category', ''),
-                    'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
-                    'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
-                    'relevance_score': float(similarities[idx]),
-                    'rank': i + 1
-                }
-                recommendations.append(book_data)
-            return recommendations
-        except Exception as e:
-            logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
-            return []
 def load_ood_thresholds(model_path):
     threshold_path = os.path.join(model_path, "ood_thresholds.json")
     if os.path.exists(threshold_path):
@@ -115,15 +90,11 @@ def load_intent_resources():
     global intent_model, intent_tokenizer, intent_classes, intent_thresholds
     try:
         print("⏳ Loading intent model from Hugging Face Hub (ZEROTSUDIOS/Bipa-Classification)")
-        # Load intent model and tokenizer from HF Hub
         intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
         intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
         print("✅ Remote model and tokenizer loaded")
         with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
             intent_classes = pickle.load(f)
         intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
         return True
     except Exception as e:
@@ -201,7 +172,6 @@ def recommend():
         "low_count": len(low_score)
     })
-# 🛠 Force run on import (for gunicorn compatibility)
 print("⚙️ Initializing models...")
 load_intent_resources()
 recommender = BookRecommender()

 import os
+os.environ["HF_HOME"] = "/tmp/huggingface"
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import wordpunct_tokenize
 nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
 os.makedirs(nltk_data_path, exist_ok=True)
 nltk.data.path.append(nltk_data_path)
 nltk.download('stopwords', download_dir=nltk_data_path, quiet=True)
 nltk.download('wordnet', download_dir=nltk_data_path, quiet=True)
 logging.basicConfig(level=logging.INFO,
             return ""
         text = text.lower()
         text = re.sub(r'[^\w\s]', ' ', text)
+        tokens = wordpunct_tokenize(text)
         tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
         return ' '.join(tokens)
             logger.error(f"Error loading model: {str(e)}", exc_info=True)
             return False
 def load_ood_thresholds(model_path):
     threshold_path = os.path.join(model_path, "ood_thresholds.json")
     if os.path.exists(threshold_path):
     global intent_model, intent_tokenizer, intent_classes, intent_thresholds
     try:
         print("⏳ Loading intent model from Hugging Face Hub (ZEROTSUDIOS/Bipa-Classification)")
         intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
         intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
         print("✅ Remote model and tokenizer loaded")
         with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
             intent_classes = pickle.load(f)
         intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
         return True
     except Exception as e:
         "low_count": len(low_score)
     })
 print("⚙️ Initializing models...")
 load_intent_resources()
 recommender = BookRecommender()