Spaces:

ZEROTSUDIOS
/

chatbot-bipa-api2

Sleeping

App Files Files

xet

Community

chatbot-bipa-api2 / app.py

ZEROTSUDIOS

Update app.py

ba1cce4 verified 3 months ago

raw

history blame contribute delete

8.54 kB

	import os
	os.environ["HF_HOME"] = "/tmp/huggingface"

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import torch
	import numpy as np
	import pickle
	import json
	import logging
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from nltk.tokenize import wordpunct_tokenize

	nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
	os.makedirs(nltk_data_path, exist_ok=True)
	nltk.data.path.append(nltk_data_path)
	nltk.download('stopwords', download_dir=nltk_data_path, quiet=True)
	nltk.download('wordnet', download_dir=nltk_data_path, quiet=True)

	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	app = Flask(__name__)
	CORS(app)

	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
	RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")

	intent_model = None
	intent_tokenizer = None
	intent_classes = None
	intent_thresholds = None
	recommender = None
	recommender_model_loaded = False

	class BookRecommender:
	def __init__(self, model_name='all-minilm-l6-v2'):
	self.model_name = model_name
	self.model = None
	self.book_embeddings = None
	self.df = None
	self.stop_words = set(stopwords.words('english'))
	self.lemmatizer = WordNetLemmatizer()

	def preprocess_text(self, text):
	if not isinstance(text, str):
	return ""
	text = text.lower()
	text = re.sub(r'[^\w\s]', ' ', text)
	tokens = wordpunct_tokenize(text)
	tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
	return ' '.join(tokens)

	def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
	try:
	print("🔄 Loading recommender model...")
	if not os.path.exists(folder_path):
	print("❌ Recommender folder not found")
	return False
	with open(os.path.join(folder_path, "config.pkl"), 'rb') as f:
	config = pickle.load(f)
	self.model_name = config['model_name']
	self.model = SentenceTransformer(os.path.join(folder_path, "sentence_transformer"))
	with open(os.path.join(folder_path, "book_embeddings.pkl"), 'rb') as f:
	self.book_embeddings = pickle.load(f)
	with open(os.path.join(folder_path, "books_data.pkl"), 'rb') as f:
	self.df = pickle.load(f)
	print("✅ Recommender model loaded")
	return True
	except Exception as e:
	logger.error(f"Error loading model: {str(e)}", exc_info=True)
	return False

	def recommend_books(self, user_query, top_n=5, include_description=True):
	if self.model is None or self.book_embeddings is None or self.df is None:
	return []
	try:
	processed_query = self.preprocess_text(user_query)
	user_embedding = self.model.encode([processed_query])
	similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
	similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
	recommendations = []
	for i, idx in enumerate(similar_books_idx):
	book_data = {
	'title': str(self.df.iloc[idx].get('Title', '')),
	'author': str(self.df.iloc[idx].get('Authors', '')),
	'category': str(self.df.iloc[idx].get('Category', '')),
	'year': str(self.df.iloc[idx].get('Publish Date (Year)', '')),
	'description': str(self.df.iloc[idx].get('Description', '')[:197] + "...") if include_description and 'Description' in self.df.columns else '',
	'relevance_score': float(similarities[idx]),
	'rank': int(i + 1)
	}
	recommendations.append(book_data)
	return recommendations
	except Exception as e:
	logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
	return []

	def load_ood_thresholds(model_path):
	threshold_path = os.path.join(model_path, "ood_thresholds.json")
	if os.path.exists(threshold_path):
	with open(threshold_path, "r") as f:
	return json.load(f)
	return {"energy_threshold": 0.0, "msp_threshold": 0.5}

	def load_intent_resources():
	global intent_model, intent_tokenizer, intent_classes, intent_thresholds
	try:
	print("⏳ Loading intent model from Hugging Face Hub (ZEROTSUDIOS/Bipa-Classification)")
	intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
	intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
	print("✅ Remote model and tokenizer loaded")
	with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
	intent_classes = pickle.load(f)
	intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
	return True
	except Exception as e:
	print("❌ Failed to load intent resources:", e)
	logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
	return False

	def predict_intent(text, method='combined'):
	inputs = intent_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
	with torch.no_grad():
	outputs = intent_model(**inputs)
	logits = outputs.logits
	probs = torch.nn.functional.softmax(logits, dim=-1)
	max_prob, pred_idx = torch.max(probs, dim=-1)
	energy = -torch.logsumexp(logits, dim=-1)
	is_ood = False
	if method == 'energy':
	is_ood = energy.item() > intent_thresholds['energy_threshold']
	elif method == 'msp':
	is_ood = max_prob.item() < intent_thresholds['msp_threshold']
	elif method == 'combined':
	is_ood = (energy.item() > intent_thresholds['energy_threshold']) and (max_prob.item() < intent_thresholds['msp_threshold'])
	return {
	"intent": intent_classes[pred_idx.item()],
	"is_ood": is_ood,
	"confidence": max_prob.item(),
	"energy_score": energy.item(),
	"class_probabilities": {
	intent_classes[i]: float(prob)
	for i, prob in enumerate(probs[0].numpy())
	}
	}


	@app.route('/')
	def root():
	return jsonify({"status": "ok"})

	@app.route('/api/health')
	def health():
	return jsonify({
	"status": "running",
	"intent_model_loaded": intent_model is not None,
	"tokenizer_loaded": intent_tokenizer is not None,
	"recommender_loaded": recommender_model_loaded
	})

	@app.route('/api/analyze', methods=['POST'])
	def analyze():
	if not request.is_json:
	return jsonify({"error": "Request must be JSON"}), 400
	data = request.get_json()
	text = data.get('text')
	method = data.get('method', 'combined')
	result = predict_intent(text, method)
	return jsonify(result)

	@app.route('/api/recommend', methods=['POST'])
	def recommend():
	global recommender_model_loaded
	if not recommender_model_loaded:
	return jsonify({"error": "Recommendation model not loaded."}), 503
	data = request.get_json()
	query = data.get('query')
	top_n = data.get('top_n', 5)
	include_description = data.get('include_description', True)
	threshold = data.get('threshold', 0.5)
	if not query:
	return jsonify({"error": "Missing query."}), 400
	recommendations = recommender.recommend_books(query, top_n=top_n, include_description=include_description)
	high_score = [rec for rec in recommendations if rec['relevance_score'] >= threshold]
	low_score = [rec for rec in recommendations if rec['relevance_score'] < threshold]
	return jsonify({
	"query": query,
	"threshold": threshold,
	"high_recommendations": high_score,
	"low_recommendations": low_score,
	"total_count": len(recommendations),
	"high_count": len(high_score),
	"low_count": len(low_score)
	})

	print("⚙️ Initializing models...")
	load_intent_resources()
	recommender = BookRecommender()
	recommender_model_loaded = recommender.load_model()
	print("✅ All models attempted to load")