ZEROTSUDIOS commited on
Commit
e1f7ca9
·
verified ·
1 Parent(s): 39ce8c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -33
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- os.environ["HF_HOME"] = "/tmp/huggingface" # ✅ custom safe cache dir
3
 
4
  from flask import Flask, request, jsonify
5
  from flask_cors import CORS
@@ -15,12 +15,12 @@ import re
15
  import nltk
16
  from nltk.corpus import stopwords
17
  from nltk.stem import WordNetLemmatizer
 
18
 
19
  nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
20
  os.makedirs(nltk_data_path, exist_ok=True)
21
  nltk.data.path.append(nltk_data_path)
22
  nltk.download('stopwords', download_dir=nltk_data_path, quiet=True)
23
- nltk.download('punkt', download_dir=nltk_data_path, quiet=True)
24
  nltk.download('wordnet', download_dir=nltk_data_path, quiet=True)
25
 
26
  logging.basicConfig(level=logging.INFO,
@@ -55,7 +55,7 @@ class BookRecommender:
55
  return ""
56
  text = text.lower()
57
  text = re.sub(r'[^\w\s]', ' ', text)
58
- tokens = nltk.word_tokenize(text)
59
  tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
60
  return ' '.join(tokens)
61
 
@@ -79,31 +79,6 @@ class BookRecommender:
79
  logger.error(f"Error loading model: {str(e)}", exc_info=True)
80
  return False
81
 
82
- def recommend_books(self, user_query, top_n=5, include_description=True):
83
- if self.model is None or self.book_embeddings is None or self.df is None:
84
- return []
85
- try:
86
- processed_query = self.preprocess_text(user_query)
87
- user_embedding = self.model.encode([processed_query])
88
- similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
89
- similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
90
- recommendations = []
91
- for i, idx in enumerate(similar_books_idx):
92
- book_data = {
93
- 'title': self.df.iloc[idx].get('Title', ''),
94
- 'author': self.df.iloc[idx].get('Authors', ''),
95
- 'category': self.df.iloc[idx].get('Category', ''),
96
- 'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
97
- 'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
98
- 'relevance_score': float(similarities[idx]),
99
- 'rank': i + 1
100
- }
101
- recommendations.append(book_data)
102
- return recommendations
103
- except Exception as e:
104
- logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
105
- return []
106
-
107
  def load_ood_thresholds(model_path):
108
  threshold_path = os.path.join(model_path, "ood_thresholds.json")
109
  if os.path.exists(threshold_path):
@@ -115,15 +90,11 @@ def load_intent_resources():
115
  global intent_model, intent_tokenizer, intent_classes, intent_thresholds
116
  try:
117
  print("⏳ Loading intent model from Hugging Face Hub (ZEROTSUDIOS/Bipa-Classification)")
118
-
119
- # Load intent model and tokenizer from HF Hub
120
  intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
121
  intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
122
  print("✅ Remote model and tokenizer loaded")
123
-
124
  with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
125
  intent_classes = pickle.load(f)
126
-
127
  intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
128
  return True
129
  except Exception as e:
@@ -201,7 +172,6 @@ def recommend():
201
  "low_count": len(low_score)
202
  })
203
 
204
- # 🛠 Force run on import (for gunicorn compatibility)
205
  print("⚙️ Initializing models...")
206
  load_intent_resources()
207
  recommender = BookRecommender()
 
1
  import os
2
+ os.environ["HF_HOME"] = "/tmp/huggingface"
3
 
4
  from flask import Flask, request, jsonify
5
  from flask_cors import CORS
 
15
  import nltk
16
  from nltk.corpus import stopwords
17
  from nltk.stem import WordNetLemmatizer
18
+ from nltk.tokenize import wordpunct_tokenize
19
 
20
  nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
21
  os.makedirs(nltk_data_path, exist_ok=True)
22
  nltk.data.path.append(nltk_data_path)
23
  nltk.download('stopwords', download_dir=nltk_data_path, quiet=True)
 
24
  nltk.download('wordnet', download_dir=nltk_data_path, quiet=True)
25
 
26
  logging.basicConfig(level=logging.INFO,
 
55
  return ""
56
  text = text.lower()
57
  text = re.sub(r'[^\w\s]', ' ', text)
58
+ tokens = wordpunct_tokenize(text)
59
  tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
60
  return ' '.join(tokens)
61
 
 
79
  logger.error(f"Error loading model: {str(e)}", exc_info=True)
80
  return False
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def load_ood_thresholds(model_path):
83
  threshold_path = os.path.join(model_path, "ood_thresholds.json")
84
  if os.path.exists(threshold_path):
 
90
  global intent_model, intent_tokenizer, intent_classes, intent_thresholds
91
  try:
92
  print("⏳ Loading intent model from Hugging Face Hub (ZEROTSUDIOS/Bipa-Classification)")
 
 
93
  intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
94
  intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
95
  print("✅ Remote model and tokenizer loaded")
 
96
  with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
97
  intent_classes = pickle.load(f)
 
98
  intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
99
  return True
100
  except Exception as e:
 
172
  "low_count": len(low_score)
173
  })
174
 
 
175
  print("⚙️ Initializing models...")
176
  load_intent_resources()
177
  recommender = BookRecommender()