ZEROTSUDIOS commited on
Commit
a70684f
·
verified ·
1 Parent(s): a36c8fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -190
app.py CHANGED
@@ -1,190 +1,190 @@
1
- from flask import Flask, request, jsonify
2
- from flask_cors import CORS
3
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
- from sentence_transformers import SentenceTransformer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- import torch
7
- import numpy as np
8
- import pickle
9
- import os
10
- import json
11
- import logging
12
- import re
13
- import nltk
14
- from nltk.corpus import stopwords
15
- from nltk.stem import WordNetLemmatizer
16
-
17
- # Download necessary NLTK resources
18
- nltk.download('stopwords', quiet=True)
19
- nltk.download('punkt', quiet=True)
20
- nltk.download('wordnet', quiet=True)
21
-
22
- # Configure logging
23
- logging.basicConfig(level=logging.INFO,
24
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
25
- logger = logging.getLogger(__name__)
26
-
27
- app = Flask(__name__)
28
- CORS(app)
29
-
30
- # Global variables and constants
31
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
32
- INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
33
- RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
34
-
35
- intent_model = None
36
- intent_tokenizer = None
37
- intent_classes = None
38
- intent_thresholds = None
39
- recommender = None
40
- recommender_model_loaded = False
41
-
42
- class BookRecommender:
43
- def __init__(self, model_name='all-minilm-l6-v2'):
44
- self.model_name = model_name
45
- self.model = None
46
- self.book_embeddings = None
47
- self.df = None
48
- self.stop_words = set(stopwords.words('english'))
49
- self.lemmatizer = WordNetLemmatizer()
50
-
51
- def preprocess_text(self, text):
52
- if not isinstance(text, str):
53
- return ""
54
- text = text.lower()
55
- text = re.sub(r'[^\w\s]', ' ', text)
56
- tokens = nltk.word_tokenize(text)
57
- tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
58
- return ' '.join(tokens)
59
-
60
- def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
61
- try:
62
- if not os.path.exists(folder_path):
63
- return False
64
- with open(os.path.join(folder_path, "config.pkl"), 'rb') as f:
65
- config = pickle.load(f)
66
- self.model_name = config['model_name']
67
- self.model = SentenceTransformer(os.path.join(folder_path, "sentence_transformer"))
68
- with open(os.path.join(folder_path, "book_embeddings.pkl"), 'rb') as f:
69
- self.book_embeddings = pickle.load(f)
70
- with open(os.path.join(folder_path, "books_data.pkl"), 'rb') as f:
71
- self.df = pickle.load(f)
72
- return True
73
- except Exception as e:
74
- logger.error(f"Error loading model: {str(e)}", exc_info=True)
75
- return False
76
-
77
- def recommend_books(self, user_query, top_n=5, include_description=True):
78
- if self.model is None or self.book_embeddings is None or self.df is None:
79
- return []
80
- try:
81
- processed_query = self.preprocess_text(user_query)
82
- user_embedding = self.model.encode([processed_query])
83
- similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
84
- similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
85
- recommendations = []
86
- for i, idx in enumerate(similar_books_idx):
87
- book_data = {
88
- 'title': self.df.iloc[idx].get('Title', ''),
89
- 'author': self.df.iloc[idx].get('Authors', ''),
90
- 'category': self.df.iloc[idx].get('Category', ''),
91
- 'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
92
- 'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
93
- 'relevance_score': float(similarities[idx]),
94
- 'rank': i + 1
95
- }
96
- recommendations.append(book_data)
97
- return recommendations
98
- except Exception as e:
99
- logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
100
- return []
101
-
102
-
103
- def load_ood_thresholds(model_path):
104
- threshold_path = os.path.join(model_path, "ood_thresholds.json")
105
- if os.path.exists(threshold_path):
106
- with open(threshold_path, "r") as f:
107
- return json.load(f)
108
- return {"energy_threshold": 0.0, "msp_threshold": 0.5}
109
-
110
-
111
- def load_intent_resources():
112
- global intent_model, intent_tokenizer, intent_classes, intent_thresholds
113
- try:
114
- intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
115
- intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
116
- with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
117
- intent_classes = pickle.load(f)
118
- intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
119
- return True
120
- except Exception as e:
121
- logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
122
- return False
123
-
124
-
125
- def predict_intent(text, method='combined'):
126
- inputs = intent_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
127
- with torch.no_grad():
128
- outputs = intent_model(**inputs)
129
- logits = outputs.logits
130
- probs = torch.nn.functional.softmax(logits, dim=-1)
131
- max_prob, pred_idx = torch.max(probs, dim=-1)
132
- energy = -torch.logsumexp(logits, dim=-1)
133
- is_ood = False
134
- if method == 'energy':
135
- is_ood = energy.item() > intent_thresholds['energy_threshold']
136
- elif method == 'msp':
137
- is_ood = max_prob.item() < intent_thresholds['msp_threshold']
138
- elif method == 'combined':
139
- is_ood = (energy.item() > intent_thresholds['energy_threshold']) and (max_prob.item() < intent_thresholds['msp_threshold'])
140
- return {
141
- "intent": intent_classes[pred_idx.item()],
142
- "is_ood": is_ood,
143
- "confidence": max_prob.item(),
144
- "energy_score": energy.item()
145
- }
146
-
147
-
148
- @app.route('/api/analyze', methods=['POST'])
149
- def analyze():
150
- if not request.is_json:
151
- return jsonify({"error": "Request must be JSON"}), 400
152
- data = request.get_json()
153
- text = data.get('text')
154
- method = data.get('method', 'combined')
155
- result = predict_intent(text, method)
156
- return jsonify(result)
157
-
158
-
159
- @app.route('/api/recommend', methods=['POST'])
160
- def recommend():
161
- global recommender_model_loaded
162
- if not recommender_model_loaded:
163
- return jsonify({"error": "Recommendation model not loaded."}), 503
164
- data = request.get_json()
165
- query = data.get('query')
166
- top_n = data.get('top_n', 5)
167
- include_description = data.get('include_description', True)
168
- threshold = data.get('threshold', 0.5)
169
- if not query:
170
- return jsonify({"error": "Missing query."}), 400
171
- recommendations = recommender.recommend_books(query, top_n=top_n, include_description=include_description)
172
- high_score = [rec for rec in recommendations if rec['relevance_score'] >= threshold]
173
- low_score = [rec for rec in recommendations if rec['relevance_score'] < threshold]
174
- return jsonify({
175
- "query": query,
176
- "threshold": threshold,
177
- "high_recommendations": high_score,
178
- "low_recommendations": low_score,
179
- "total_count": len(recommendations),
180
- "high_count": len(high_score),
181
- "low_count": len(low_score)
182
- })
183
-
184
-
185
- if __name__ == '__main__':
186
- load_intent_resources()
187
- recommender = BookRecommender()
188
- recommender_model_loaded = recommender.load_model()
189
- port = int(os.environ.get('PORT', 5000))
190
- app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+ import os
10
+ import json
11
+ import logging
12
+ import re
13
+ import nltk
14
+ from nltk.corpus import stopwords
15
+ from nltk.stem import WordNetLemmatizer
16
+
17
+ # Download necessary NLTK resources
18
+ nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
19
+ os.makedirs(nltk_data_path, exist_ok=True)
20
+ nltk.data.path.append(nltk_data_path)
21
+
22
+ # Configure logging
23
+ logging.basicConfig(level=logging.INFO,
24
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
25
+ logger = logging.getLogger(__name__)
26
+
27
+ app = Flask(__name__)
28
+ CORS(app)
29
+
30
+ # Global variables and constants
31
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
32
+ INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
33
+ RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
34
+
35
+ intent_model = None
36
+ intent_tokenizer = None
37
+ intent_classes = None
38
+ intent_thresholds = None
39
+ recommender = None
40
+ recommender_model_loaded = False
41
+
42
+ class BookRecommender:
43
+ def __init__(self, model_name='all-minilm-l6-v2'):
44
+ self.model_name = model_name
45
+ self.model = None
46
+ self.book_embeddings = None
47
+ self.df = None
48
+ self.stop_words = set(stopwords.words('english'))
49
+ self.lemmatizer = WordNetLemmatizer()
50
+
51
+ def preprocess_text(self, text):
52
+ if not isinstance(text, str):
53
+ return ""
54
+ text = text.lower()
55
+ text = re.sub(r'[^\w\s]', ' ', text)
56
+ tokens = nltk.word_tokenize(text)
57
+ tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
58
+ return ' '.join(tokens)
59
+
60
+ def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
61
+ try:
62
+ if not os.path.exists(folder_path):
63
+ return False
64
+ with open(os.path.join(folder_path, "config.pkl"), 'rb') as f:
65
+ config = pickle.load(f)
66
+ self.model_name = config['model_name']
67
+ self.model = SentenceTransformer(os.path.join(folder_path, "sentence_transformer"))
68
+ with open(os.path.join(folder_path, "book_embeddings.pkl"), 'rb') as f:
69
+ self.book_embeddings = pickle.load(f)
70
+ with open(os.path.join(folder_path, "books_data.pkl"), 'rb') as f:
71
+ self.df = pickle.load(f)
72
+ return True
73
+ except Exception as e:
74
+ logger.error(f"Error loading model: {str(e)}", exc_info=True)
75
+ return False
76
+
77
+ def recommend_books(self, user_query, top_n=5, include_description=True):
78
+ if self.model is None or self.book_embeddings is None or self.df is None:
79
+ return []
80
+ try:
81
+ processed_query = self.preprocess_text(user_query)
82
+ user_embedding = self.model.encode([processed_query])
83
+ similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
84
+ similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
85
+ recommendations = []
86
+ for i, idx in enumerate(similar_books_idx):
87
+ book_data = {
88
+ 'title': self.df.iloc[idx].get('Title', ''),
89
+ 'author': self.df.iloc[idx].get('Authors', ''),
90
+ 'category': self.df.iloc[idx].get('Category', ''),
91
+ 'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
92
+ 'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
93
+ 'relevance_score': float(similarities[idx]),
94
+ 'rank': i + 1
95
+ }
96
+ recommendations.append(book_data)
97
+ return recommendations
98
+ except Exception as e:
99
+ logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
100
+ return []
101
+
102
+
103
+ def load_ood_thresholds(model_path):
104
+ threshold_path = os.path.join(model_path, "ood_thresholds.json")
105
+ if os.path.exists(threshold_path):
106
+ with open(threshold_path, "r") as f:
107
+ return json.load(f)
108
+ return {"energy_threshold": 0.0, "msp_threshold": 0.5}
109
+
110
+
111
+ def load_intent_resources():
112
+ global intent_model, intent_tokenizer, intent_classes, intent_thresholds
113
+ try:
114
+ intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
115
+ intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
116
+ with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
117
+ intent_classes = pickle.load(f)
118
+ intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
119
+ return True
120
+ except Exception as e:
121
+ logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
122
+ return False
123
+
124
+
125
+ def predict_intent(text, method='combined'):
126
+ inputs = intent_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
127
+ with torch.no_grad():
128
+ outputs = intent_model(**inputs)
129
+ logits = outputs.logits
130
+ probs = torch.nn.functional.softmax(logits, dim=-1)
131
+ max_prob, pred_idx = torch.max(probs, dim=-1)
132
+ energy = -torch.logsumexp(logits, dim=-1)
133
+ is_ood = False
134
+ if method == 'energy':
135
+ is_ood = energy.item() > intent_thresholds['energy_threshold']
136
+ elif method == 'msp':
137
+ is_ood = max_prob.item() < intent_thresholds['msp_threshold']
138
+ elif method == 'combined':
139
+ is_ood = (energy.item() > intent_thresholds['energy_threshold']) and (max_prob.item() < intent_thresholds['msp_threshold'])
140
+ return {
141
+ "intent": intent_classes[pred_idx.item()],
142
+ "is_ood": is_ood,
143
+ "confidence": max_prob.item(),
144
+ "energy_score": energy.item()
145
+ }
146
+
147
+
148
+ @app.route('/api/analyze', methods=['POST'])
149
+ def analyze():
150
+ if not request.is_json:
151
+ return jsonify({"error": "Request must be JSON"}), 400
152
+ data = request.get_json()
153
+ text = data.get('text')
154
+ method = data.get('method', 'combined')
155
+ result = predict_intent(text, method)
156
+ return jsonify(result)
157
+
158
+
159
+ @app.route('/api/recommend', methods=['POST'])
160
+ def recommend():
161
+ global recommender_model_loaded
162
+ if not recommender_model_loaded:
163
+ return jsonify({"error": "Recommendation model not loaded."}), 503
164
+ data = request.get_json()
165
+ query = data.get('query')
166
+ top_n = data.get('top_n', 5)
167
+ include_description = data.get('include_description', True)
168
+ threshold = data.get('threshold', 0.5)
169
+ if not query:
170
+ return jsonify({"error": "Missing query."}), 400
171
+ recommendations = recommender.recommend_books(query, top_n=top_n, include_description=include_description)
172
+ high_score = [rec for rec in recommendations if rec['relevance_score'] >= threshold]
173
+ low_score = [rec for rec in recommendations if rec['relevance_score'] < threshold]
174
+ return jsonify({
175
+ "query": query,
176
+ "threshold": threshold,
177
+ "high_recommendations": high_score,
178
+ "low_recommendations": low_score,
179
+ "total_count": len(recommendations),
180
+ "high_count": len(high_score),
181
+ "low_count": len(low_score)
182
+ })
183
+
184
+
185
+ if __name__ == '__main__':
186
+ load_intent_resources()
187
+ recommender = BookRecommender()
188
+ recommender_model_loaded = recommender.load_model()
189
+ port = int(os.environ.get('PORT', 5000))
190
+ app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)