Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +49 -35
- .gitignore +1 -0
- DockerFile +23 -0
- api.log +0 -0
- app.py +190 -0
- app2.py +460 -0
- app3.py +580 -0
- app4.py +580 -0
- combined_api.log +0 -0
- evaluate_model.py +208 -0
- evaluation_plots/20250515_142829_confidence_distribution.png +0 -0
- evaluation_plots/20250515_142829_energy_distribution.png +0 -0
- evaluation_plots/20250515_142829_intent_distribution.png +0 -0
- evaluation_plots/20250515_142829_ood_by_method.png +0 -0
- hf.py +18 -0
- intent_api.log +504 -0
- model/class_metrics/f1_per_class.html +0 -0
- model/class_metrics/f1_per_class.png +0 -0
- model/class_performance_metrics.png +0 -0
- model/classification_report.csv +14 -0
- model/classification_report.json +75 -0
- model/combined_metrics.html +0 -0
- model/config.json +51 -0
- model/confusion_matrices/cm_epoch_1.png +0 -0
- model/confusion_matrices/cm_epoch_10.png +0 -0
- model/confusion_matrices/cm_epoch_11.png +0 -0
- model/confusion_matrices/cm_epoch_12.png +0 -0
- model/confusion_matrices/cm_epoch_2.png +0 -0
- model/confusion_matrices/cm_epoch_3.png +0 -0
- model/confusion_matrices/cm_epoch_4.png +0 -0
- model/confusion_matrices/cm_epoch_5.png +0 -0
- model/confusion_matrices/cm_epoch_6.png +0 -0
- model/confusion_matrices/cm_epoch_7.png +0 -0
- model/confusion_matrices/cm_epoch_8.png +0 -0
- model/confusion_matrices/cm_epoch_9.png +0 -0
- model/enhanced_training_history.json +0 -0
- model/final_confusion_matrix.png +0 -0
- model/intent_classes.pkl +3 -0
- model/interactive_class_performance.html +0 -0
- model/interactive_confusion_matrix.html +0 -0
- model/interactive_training_metrics.html +0 -0
- model/label_encoder.pkl +3 -0
- model/learning_rate_schedule.html +0 -0
- model/learning_rate_schedule.png +0 -0
- model/ood_thresholds.json +4 -0
- model/sentencepiece.bpe.model +3 -0
- model/special_tokens_map.json +15 -0
- model/test_results/test_run_20250702_143737/classification_report.json +75 -0
- model/test_results/test_run_20250702_143737/confidence_analysis.json +13 -0
- model/test_results/test_run_20250702_143737/confidence_analysis.png +3 -0
.gitattributes
CHANGED
@@ -1,35 +1,49 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model/test_results/test_run_20250702_143737/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
model/test_results/test_run_20250702_143737/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
model/test_results/test_run_20250702_143737/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
model/test_results/test_run_20250702_143737/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
model/test_results/test_run_20250702_152814/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
model/test_results/test_run_20250702_152814/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
model/test_results/test_run_20250702_152814/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
model/test_results/test_run_20250702_152814/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
model/test_results/test_run_20250702_152925/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
model/test_results/test_run_20250702_152925/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
model/test_results/test_run_20250702_152925/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
model/test_results/test_run_20250702_152925/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
49 |
+
model/training_metrics.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model/model.safetensors
|
DockerFile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
# Set working directory
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# Install system packages (nltk needs some)
|
7 |
+
RUN apt-get update && apt-get install -y \
|
8 |
+
build-essential \
|
9 |
+
git \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
# Copy everything to container
|
13 |
+
COPY . .
|
14 |
+
|
15 |
+
# Install dependencies
|
16 |
+
RUN pip install --upgrade pip
|
17 |
+
RUN pip install -r requirements.txt
|
18 |
+
|
19 |
+
# Download NLTK data
|
20 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('stopwords'); nltk.download('wordnet')"
|
21 |
+
|
22 |
+
# Start app using gunicorn
|
23 |
+
CMD [\"gunicorn\", \"-w\", \"4\", \"-b\", \"0.0.0.0:7860\", \"app:app\"]
|
api.log
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from flask_cors import CORS
|
3 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
import pickle
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import logging
|
12 |
+
import re
|
13 |
+
import nltk
|
14 |
+
from nltk.corpus import stopwords
|
15 |
+
from nltk.stem import WordNetLemmatizer
|
16 |
+
|
17 |
+
# Download necessary NLTK resources
|
18 |
+
nltk.download('stopwords', quiet=True)
|
19 |
+
nltk.download('punkt', quiet=True)
|
20 |
+
nltk.download('wordnet', quiet=True)
|
21 |
+
|
22 |
+
# Configure logging
|
23 |
+
logging.basicConfig(level=logging.INFO,
|
24 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
25 |
+
logger = logging.getLogger(__name__)
|
26 |
+
|
27 |
+
app = Flask(__name__)
|
28 |
+
CORS(app)
|
29 |
+
|
30 |
+
# Global variables and constants
|
31 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
32 |
+
INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
|
33 |
+
RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
|
34 |
+
|
35 |
+
intent_model = None
|
36 |
+
intent_tokenizer = None
|
37 |
+
intent_classes = None
|
38 |
+
intent_thresholds = None
|
39 |
+
recommender = None
|
40 |
+
recommender_model_loaded = False
|
41 |
+
|
42 |
+
class BookRecommender:
|
43 |
+
def __init__(self, model_name='all-minilm-l6-v2'):
|
44 |
+
self.model_name = model_name
|
45 |
+
self.model = None
|
46 |
+
self.book_embeddings = None
|
47 |
+
self.df = None
|
48 |
+
self.stop_words = set(stopwords.words('english'))
|
49 |
+
self.lemmatizer = WordNetLemmatizer()
|
50 |
+
|
51 |
+
def preprocess_text(self, text):
|
52 |
+
if not isinstance(text, str):
|
53 |
+
return ""
|
54 |
+
text = text.lower()
|
55 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
56 |
+
tokens = nltk.word_tokenize(text)
|
57 |
+
tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
|
58 |
+
return ' '.join(tokens)
|
59 |
+
|
60 |
+
def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
|
61 |
+
try:
|
62 |
+
if not os.path.exists(folder_path):
|
63 |
+
return False
|
64 |
+
with open(os.path.join(folder_path, "config.pkl"), 'rb') as f:
|
65 |
+
config = pickle.load(f)
|
66 |
+
self.model_name = config['model_name']
|
67 |
+
self.model = SentenceTransformer(os.path.join(folder_path, "sentence_transformer"))
|
68 |
+
with open(os.path.join(folder_path, "book_embeddings.pkl"), 'rb') as f:
|
69 |
+
self.book_embeddings = pickle.load(f)
|
70 |
+
with open(os.path.join(folder_path, "books_data.pkl"), 'rb') as f:
|
71 |
+
self.df = pickle.load(f)
|
72 |
+
return True
|
73 |
+
except Exception as e:
|
74 |
+
logger.error(f"Error loading model: {str(e)}", exc_info=True)
|
75 |
+
return False
|
76 |
+
|
77 |
+
def recommend_books(self, user_query, top_n=5, include_description=True):
|
78 |
+
if self.model is None or self.book_embeddings is None or self.df is None:
|
79 |
+
return []
|
80 |
+
try:
|
81 |
+
processed_query = self.preprocess_text(user_query)
|
82 |
+
user_embedding = self.model.encode([processed_query])
|
83 |
+
similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
|
84 |
+
similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
|
85 |
+
recommendations = []
|
86 |
+
for i, idx in enumerate(similar_books_idx):
|
87 |
+
book_data = {
|
88 |
+
'title': self.df.iloc[idx].get('Title', ''),
|
89 |
+
'author': self.df.iloc[idx].get('Authors', ''),
|
90 |
+
'category': self.df.iloc[idx].get('Category', ''),
|
91 |
+
'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
|
92 |
+
'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
|
93 |
+
'relevance_score': float(similarities[idx]),
|
94 |
+
'rank': i + 1
|
95 |
+
}
|
96 |
+
recommendations.append(book_data)
|
97 |
+
return recommendations
|
98 |
+
except Exception as e:
|
99 |
+
logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
|
100 |
+
return []
|
101 |
+
|
102 |
+
|
103 |
+
def load_ood_thresholds(model_path):
|
104 |
+
threshold_path = os.path.join(model_path, "ood_thresholds.json")
|
105 |
+
if os.path.exists(threshold_path):
|
106 |
+
with open(threshold_path, "r") as f:
|
107 |
+
return json.load(f)
|
108 |
+
return {"energy_threshold": 0.0, "msp_threshold": 0.5}
|
109 |
+
|
110 |
+
|
111 |
+
def load_intent_resources():
|
112 |
+
global intent_model, intent_tokenizer, intent_classes, intent_thresholds
|
113 |
+
try:
|
114 |
+
intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
|
115 |
+
intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
|
116 |
+
with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
|
117 |
+
intent_classes = pickle.load(f)
|
118 |
+
intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
|
119 |
+
return True
|
120 |
+
except Exception as e:
|
121 |
+
logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
|
122 |
+
return False
|
123 |
+
|
124 |
+
|
125 |
+
def predict_intent(text, method='combined'):
|
126 |
+
inputs = intent_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
127 |
+
with torch.no_grad():
|
128 |
+
outputs = intent_model(**inputs)
|
129 |
+
logits = outputs.logits
|
130 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
131 |
+
max_prob, pred_idx = torch.max(probs, dim=-1)
|
132 |
+
energy = -torch.logsumexp(logits, dim=-1)
|
133 |
+
is_ood = False
|
134 |
+
if method == 'energy':
|
135 |
+
is_ood = energy.item() > intent_thresholds['energy_threshold']
|
136 |
+
elif method == 'msp':
|
137 |
+
is_ood = max_prob.item() < intent_thresholds['msp_threshold']
|
138 |
+
elif method == 'combined':
|
139 |
+
is_ood = (energy.item() > intent_thresholds['energy_threshold']) and (max_prob.item() < intent_thresholds['msp_threshold'])
|
140 |
+
return {
|
141 |
+
"intent": intent_classes[pred_idx.item()],
|
142 |
+
"is_ood": is_ood,
|
143 |
+
"confidence": max_prob.item(),
|
144 |
+
"energy_score": energy.item()
|
145 |
+
}
|
146 |
+
|
147 |
+
|
148 |
+
@app.route('/api/analyze', methods=['POST'])
|
149 |
+
def analyze():
|
150 |
+
if not request.is_json:
|
151 |
+
return jsonify({"error": "Request must be JSON"}), 400
|
152 |
+
data = request.get_json()
|
153 |
+
text = data.get('text')
|
154 |
+
method = data.get('method', 'combined')
|
155 |
+
result = predict_intent(text, method)
|
156 |
+
return jsonify(result)
|
157 |
+
|
158 |
+
|
159 |
+
@app.route('/api/recommend', methods=['POST'])
|
160 |
+
def recommend():
|
161 |
+
global recommender_model_loaded
|
162 |
+
if not recommender_model_loaded:
|
163 |
+
return jsonify({"error": "Recommendation model not loaded."}), 503
|
164 |
+
data = request.get_json()
|
165 |
+
query = data.get('query')
|
166 |
+
top_n = data.get('top_n', 5)
|
167 |
+
include_description = data.get('include_description', True)
|
168 |
+
threshold = data.get('threshold', 0.5)
|
169 |
+
if not query:
|
170 |
+
return jsonify({"error": "Missing query."}), 400
|
171 |
+
recommendations = recommender.recommend_books(query, top_n=top_n, include_description=include_description)
|
172 |
+
high_score = [rec for rec in recommendations if rec['relevance_score'] >= threshold]
|
173 |
+
low_score = [rec for rec in recommendations if rec['relevance_score'] < threshold]
|
174 |
+
return jsonify({
|
175 |
+
"query": query,
|
176 |
+
"threshold": threshold,
|
177 |
+
"high_recommendations": high_score,
|
178 |
+
"low_recommendations": low_score,
|
179 |
+
"total_count": len(recommendations),
|
180 |
+
"high_count": len(high_score),
|
181 |
+
"low_count": len(low_score)
|
182 |
+
})
|
183 |
+
|
184 |
+
|
185 |
+
if __name__ == '__main__':
|
186 |
+
load_intent_resources()
|
187 |
+
recommender = BookRecommender()
|
188 |
+
recommender_model_loaded = recommender.load_model()
|
189 |
+
port = int(os.environ.get('PORT', 5000))
|
190 |
+
app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
|
app2.py
ADDED
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
import pickle
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import re
|
12 |
+
import nltk
|
13 |
+
from nltk.corpus import stopwords
|
14 |
+
from nltk.stem import WordNetLemmatizer
|
15 |
+
import logging
|
16 |
+
from flask_cors import CORS
|
17 |
+
|
18 |
+
# Configure logging
|
19 |
+
logging.basicConfig(level=logging.INFO,
|
20 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
21 |
+
handlers=[logging.FileHandler("api.log"),
|
22 |
+
logging.StreamHandler()])
|
23 |
+
logger = logging.getLogger(__name__)
|
24 |
+
|
25 |
+
# Download required NLTK resources (only needs to run once on server startup)
|
26 |
+
try:
|
27 |
+
nltk.data.find('corpora/stopwords')
|
28 |
+
nltk.data.find('corpora/wordnet')
|
29 |
+
nltk.data.find('corpora/punkt')
|
30 |
+
except LookupError:
|
31 |
+
nltk.download('stopwords')
|
32 |
+
nltk.download('wordnet')
|
33 |
+
nltk.download('punkt')
|
34 |
+
|
35 |
+
app = Flask(__name__)
|
36 |
+
CORS(app) # Enable Cross-Origin Resource Sharing
|
37 |
+
|
38 |
+
# Global variables for intent classification
|
39 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
40 |
+
MODEL_SAVE_PATH = os.path.join(BASE_DIR, "model")
|
41 |
+
intent_model = None
|
42 |
+
intent_tokenizer = None
|
43 |
+
intent_classes = None
|
44 |
+
intent_thresholds = None
|
45 |
+
|
46 |
+
class BookRecommender:
|
47 |
+
def __init__(self, model_name='all-minilm-l6-v2'):
|
48 |
+
"""Initialize the book recommender with specified model."""
|
49 |
+
self.model_name = model_name
|
50 |
+
self.model = None
|
51 |
+
self.book_embeddings = None
|
52 |
+
self.df = None
|
53 |
+
self.stop_words = set(stopwords.words('english'))
|
54 |
+
self.lemmatizer = WordNetLemmatizer()
|
55 |
+
logger.info(f"BookRecommender initialized with model: {model_name}")
|
56 |
+
|
57 |
+
def preprocess_text(self, text):
|
58 |
+
"""Advanced text preprocessing with stopword removal and lemmatization."""
|
59 |
+
if not isinstance(text, str):
|
60 |
+
return ""
|
61 |
+
|
62 |
+
# Convert to lowercase and remove special characters
|
63 |
+
text = text.lower()
|
64 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
65 |
+
|
66 |
+
# Tokenize, remove stopwords, and lemmatize
|
67 |
+
tokens = nltk.word_tokenize(text)
|
68 |
+
tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
|
69 |
+
|
70 |
+
return ' '.join(tokens)
|
71 |
+
|
72 |
+
def load_model(self, folder_path="recommender_model"):
|
73 |
+
"""Load a previously saved model and embeddings for inference."""
|
74 |
+
try:
|
75 |
+
# Check if folder exists
|
76 |
+
if not os.path.exists(folder_path):
|
77 |
+
logger.error(f"Model folder {folder_path} does not exist.")
|
78 |
+
return False
|
79 |
+
|
80 |
+
# Load configuration
|
81 |
+
config_path = os.path.join(folder_path, "config.pkl")
|
82 |
+
with open(config_path, 'rb') as f:
|
83 |
+
config = pickle.load(f)
|
84 |
+
self.model_name = config['model_name']
|
85 |
+
logger.info(f"Loaded configuration: model_name={self.model_name}")
|
86 |
+
|
87 |
+
# Load the sentence transformer model
|
88 |
+
model_path = os.path.join(folder_path, "sentence_transformer")
|
89 |
+
self.model = SentenceTransformer(model_path)
|
90 |
+
logger.info(f"Model loaded from {model_path}")
|
91 |
+
|
92 |
+
# Load book embeddings
|
93 |
+
embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
|
94 |
+
with open(embeddings_path, 'rb') as f:
|
95 |
+
self.book_embeddings = pickle.load(f)
|
96 |
+
logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
|
97 |
+
|
98 |
+
# Load the DataFrame
|
99 |
+
df_path = os.path.join(folder_path, "books_data.pkl")
|
100 |
+
with open(df_path, 'rb') as f:
|
101 |
+
self.df = pickle.load(f)
|
102 |
+
logger.info(f"DataFrame loaded: {len(self.df)} books")
|
103 |
+
|
104 |
+
return True
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
logger.error(f"Error loading model: {str(e)}", exc_info=True)
|
108 |
+
return False
|
109 |
+
|
110 |
+
def recommend_books(self, user_query, top_n=5, include_description=True):
|
111 |
+
"""Recommend books based on user query."""
|
112 |
+
if self.model is None or self.book_embeddings is None or self.df is None:
|
113 |
+
logger.error("Model not initialized. Cannot make recommendations.")
|
114 |
+
return []
|
115 |
+
|
116 |
+
logger.info(f"Finding books similar to: '{user_query}'")
|
117 |
+
|
118 |
+
try:
|
119 |
+
# Preprocess the query the same way as the book text
|
120 |
+
processed_query = self.preprocess_text(user_query)
|
121 |
+
|
122 |
+
# Encode user query
|
123 |
+
user_embedding = self.model.encode([processed_query])
|
124 |
+
|
125 |
+
# Compute similarity between query and books
|
126 |
+
similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
|
127 |
+
|
128 |
+
# Get top N most similar books
|
129 |
+
similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
|
130 |
+
|
131 |
+
recommendations = []
|
132 |
+
|
133 |
+
for i, idx in enumerate(similar_books_idx):
|
134 |
+
book_data = {}
|
135 |
+
|
136 |
+
# Extract book information
|
137 |
+
if 'Title' in self.df.columns:
|
138 |
+
book_data['title'] = self.df.iloc[idx]['Title']
|
139 |
+
|
140 |
+
if 'Authors' in self.df.columns:
|
141 |
+
book_data['author'] = self.df.iloc[idx]['Authors']
|
142 |
+
|
143 |
+
if 'Category' in self.df.columns:
|
144 |
+
book_data['category'] = self.df.iloc[idx]['Category']
|
145 |
+
|
146 |
+
if 'Publish Date (Year)' in self.df.columns:
|
147 |
+
book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
|
148 |
+
|
149 |
+
if include_description and 'Description' in self.df.columns:
|
150 |
+
# Truncate long descriptions
|
151 |
+
description = self.df.iloc[idx]['Description']
|
152 |
+
if len(description) > 200:
|
153 |
+
description = description[:197] + "..."
|
154 |
+
book_data['description'] = description
|
155 |
+
|
156 |
+
# Add similarity score
|
157 |
+
book_data['relevance_score'] = float(similarities[idx])
|
158 |
+
book_data['rank'] = i + 1
|
159 |
+
|
160 |
+
recommendations.append(book_data)
|
161 |
+
|
162 |
+
logger.info(f"Successfully generated {len(recommendations)} recommendations")
|
163 |
+
return recommendations
|
164 |
+
|
165 |
+
except Exception as e:
|
166 |
+
logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
|
167 |
+
return []
|
168 |
+
|
169 |
+
# Initialize the recommender system
|
170 |
+
recommender = BookRecommender()
|
171 |
+
recommender_model_loaded = False
|
172 |
+
|
173 |
+
def load_ood_thresholds(model_path):
|
174 |
+
"""Load the OOD thresholds from the model directory - using JSON instead of pickle."""
|
175 |
+
# Look for JSON file instead of pickle
|
176 |
+
threshold_path = os.path.join(model_path, "ood_thresholds.json")
|
177 |
+
|
178 |
+
# Check if file exists before attempting to open
|
179 |
+
if os.path.exists(threshold_path):
|
180 |
+
with open(threshold_path, "r") as f:
|
181 |
+
return json.load(f)
|
182 |
+
else:
|
183 |
+
# Provide default thresholds if file not found
|
184 |
+
logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
|
185 |
+
return {
|
186 |
+
"energy_threshold": 0.0, # Replace with your default value
|
187 |
+
"msp_threshold": 0.5 # Replace with your default value
|
188 |
+
}
|
189 |
+
|
190 |
+
def load_intent_resources():
|
191 |
+
"""Load model, tokenizer, intent classes, and thresholds for intent classification."""
|
192 |
+
global intent_model, intent_tokenizer, intent_classes, intent_thresholds
|
193 |
+
|
194 |
+
logger.info(f"Loading intent resources from {MODEL_SAVE_PATH}...")
|
195 |
+
|
196 |
+
# Load model and tokenizer
|
197 |
+
intent_model = AutoModelForSequenceClassification.from_pretrained(MODEL_SAVE_PATH)
|
198 |
+
intent_tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
|
199 |
+
|
200 |
+
# Load intent classes
|
201 |
+
intent_classes_path = os.path.join(MODEL_SAVE_PATH, "intent_classes.pkl")
|
202 |
+
if os.path.exists(intent_classes_path):
|
203 |
+
with open(intent_classes_path, "rb") as f:
|
204 |
+
intent_classes = pickle.load(f)
|
205 |
+
else:
|
206 |
+
raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
|
207 |
+
|
208 |
+
# Load OOD thresholds
|
209 |
+
intent_thresholds = load_ood_thresholds(MODEL_SAVE_PATH)
|
210 |
+
|
211 |
+
logger.info("Intent resources loaded successfully")
|
212 |
+
logger.info(f"Loaded {len(intent_classes)} intent classes")
|
213 |
+
logger.info(f"Thresholds: {intent_thresholds}")
|
214 |
+
|
215 |
+
def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
|
216 |
+
energy_threshold, msp_threshold, method='combined'):
|
217 |
+
"""
|
218 |
+
Predict intent with enhanced out-of-distribution detection and print details to terminal.
|
219 |
+
"""
|
220 |
+
logger.info("\n========== INTENT PREDICTION DEBUG ==========")
|
221 |
+
logger.info(f"Input Text: {text}")
|
222 |
+
logger.info(f"Detection Method: {method}")
|
223 |
+
|
224 |
+
# Tokenize input
|
225 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
226 |
+
|
227 |
+
# Get model outputs
|
228 |
+
with torch.no_grad():
|
229 |
+
outputs = model(**inputs)
|
230 |
+
logits = outputs.logits
|
231 |
+
|
232 |
+
logger.info(f"Logits: {logits.numpy().tolist()}")
|
233 |
+
|
234 |
+
# Get probabilities
|
235 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
236 |
+
max_prob, pred_idx = torch.max(probs, dim=-1)
|
237 |
+
|
238 |
+
logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
|
239 |
+
logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
|
240 |
+
logger.info(f"Predicted Index: {pred_idx.item()}")
|
241 |
+
|
242 |
+
# Calculate energy score
|
243 |
+
energy = -torch.logsumexp(logits, dim=-1)
|
244 |
+
logger.info(f"Energy Score: {energy.item():.4f}")
|
245 |
+
|
246 |
+
# OOD detection
|
247 |
+
is_ood = False
|
248 |
+
if method == 'energy':
|
249 |
+
is_ood = energy.item() > energy_threshold
|
250 |
+
elif method == 'msp':
|
251 |
+
is_ood = max_prob.item() < msp_threshold
|
252 |
+
elif method == 'combined':
|
253 |
+
is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
|
254 |
+
|
255 |
+
logger.info(f"OOD Detection -> is_ood: {is_ood}")
|
256 |
+
if is_ood:
|
257 |
+
logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
|
258 |
+
else:
|
259 |
+
logger.info("Prediction marked as IN-DISTRIBUTION.")
|
260 |
+
|
261 |
+
# Get intent label
|
262 |
+
predicted_intent = intent_classes[pred_idx.item()] if not is_ood else "unknown"
|
263 |
+
logger.info(f"Predicted Intent: {predicted_intent}")
|
264 |
+
logger.info("=============================================\n")
|
265 |
+
|
266 |
+
return {
|
267 |
+
"intent": predicted_intent,
|
268 |
+
"is_ood": is_ood,
|
269 |
+
"confidence": max_prob.item(),
|
270 |
+
"energy_score": energy.item()
|
271 |
+
}
|
272 |
+
|
273 |
+
def initialize_models():
|
274 |
+
"""Load both models on startup."""
|
275 |
+
global recommender_model_loaded
|
276 |
+
|
277 |
+
# Load intent classification model
|
278 |
+
try:
|
279 |
+
load_intent_resources()
|
280 |
+
logger.info("Intent classification model loaded successfully!")
|
281 |
+
except Exception as e:
|
282 |
+
logger.error(f"Failed to load intent model: {str(e)}", exc_info=True)
|
283 |
+
|
284 |
+
# Load book recommender model
|
285 |
+
logger.info("Loading recommender model...")
|
286 |
+
recommender_model_loaded = recommender.load_model("recommender_model")
|
287 |
+
if recommender_model_loaded:
|
288 |
+
logger.info("Book recommender model loaded successfully!")
|
289 |
+
else:
|
290 |
+
logger.error("Failed to load book recommender model.")
|
291 |
+
|
292 |
+
@app.route('/api/health', methods=['GET'])
|
293 |
+
def health_check():
|
294 |
+
"""Endpoint to check if the API is running and models are loaded."""
|
295 |
+
global recommender_model_loaded
|
296 |
+
|
297 |
+
intent_model_loaded = intent_model is not None and intent_tokenizer is not None
|
298 |
+
|
299 |
+
return jsonify({
|
300 |
+
"status": "healthy" if (intent_model_loaded and recommender_model_loaded) else "partial",
|
301 |
+
"services": {
|
302 |
+
"intent_classification": "loaded" if intent_model_loaded else "not loaded",
|
303 |
+
"book_recommendations": "loaded" if recommender_model_loaded else "not loaded"
|
304 |
+
}
|
305 |
+
})
|
306 |
+
|
307 |
+
@app.route('/api/analyze', methods=['POST'])
|
308 |
+
def analyze():
|
309 |
+
"""Endpoint to predict intent from text."""
|
310 |
+
# Check if request contains JSON
|
311 |
+
if not request.is_json:
|
312 |
+
return jsonify({"error": "Request must be JSON"}), 400
|
313 |
+
|
314 |
+
# Get text from request
|
315 |
+
data = request.get_json()
|
316 |
+
if 'text' not in data:
|
317 |
+
return jsonify({"error": "Missing 'text' field in request"}), 400
|
318 |
+
|
319 |
+
text = data['text']
|
320 |
+
|
321 |
+
# Default to combined method unless specified
|
322 |
+
method = data.get('method', 'combined')
|
323 |
+
if method not in ['energy', 'msp', 'combined']:
|
324 |
+
return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
|
325 |
+
|
326 |
+
# Make prediction
|
327 |
+
result = predict_intent_with_enhanced_ood(
|
328 |
+
text,
|
329 |
+
intent_model,
|
330 |
+
intent_tokenizer,
|
331 |
+
intent_classes,
|
332 |
+
intent_thresholds["energy_threshold"],
|
333 |
+
intent_thresholds["msp_threshold"],
|
334 |
+
method=method
|
335 |
+
)
|
336 |
+
|
337 |
+
# Return prediction as JSON
|
338 |
+
return jsonify(result)
|
339 |
+
|
340 |
+
@app.route('/api/recommend', methods=['POST'])
|
341 |
+
def recommend():
|
342 |
+
"""Endpoint to get book recommendations based on user query."""
|
343 |
+
global recommender_model_loaded
|
344 |
+
|
345 |
+
if not recommender_model_loaded:
|
346 |
+
return jsonify({
|
347 |
+
"error": "Model not loaded",
|
348 |
+
"message": "The recommendation model is not properly loaded."
|
349 |
+
}), 503
|
350 |
+
|
351 |
+
data = request.get_json()
|
352 |
+
|
353 |
+
if not data:
|
354 |
+
return jsonify({
|
355 |
+
"error": "Invalid request",
|
356 |
+
"message": "No JSON data provided."
|
357 |
+
}), 400
|
358 |
+
|
359 |
+
query = data.get('query')
|
360 |
+
top_n = data.get('top_n', 5)
|
361 |
+
include_description = data.get('include_description', True)
|
362 |
+
|
363 |
+
if not query:
|
364 |
+
return jsonify({
|
365 |
+
"error": "Missing parameter",
|
366 |
+
"message": "Query parameter is required."
|
367 |
+
}), 400
|
368 |
+
|
369 |
+
try:
|
370 |
+
# Get recommendations
|
371 |
+
recommendations = recommender.recommend_books(
|
372 |
+
user_query=query,
|
373 |
+
top_n=int(top_n),
|
374 |
+
include_description=bool(include_description)
|
375 |
+
)
|
376 |
+
|
377 |
+
# CLEAN recommendations to make it JSON serializable
|
378 |
+
def clean_np(obj):
|
379 |
+
if isinstance(obj, np.integer):
|
380 |
+
return int(obj)
|
381 |
+
elif isinstance(obj, np.floating):
|
382 |
+
return float(obj)
|
383 |
+
elif isinstance(obj, np.ndarray):
|
384 |
+
return obj.tolist()
|
385 |
+
elif isinstance(obj, dict):
|
386 |
+
return {k: clean_np(v) for k, v in obj.items()}
|
387 |
+
elif isinstance(obj, list):
|
388 |
+
return [clean_np(i) for i in obj]
|
389 |
+
else:
|
390 |
+
return obj
|
391 |
+
|
392 |
+
recommendations_clean = clean_np(recommendations)
|
393 |
+
|
394 |
+
return jsonify({
|
395 |
+
"query": query,
|
396 |
+
"recommendations": recommendations_clean,
|
397 |
+
"count": len(recommendations_clean)
|
398 |
+
})
|
399 |
+
|
400 |
+
except Exception as e:
|
401 |
+
logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
|
402 |
+
return jsonify({
|
403 |
+
"error": "Processing error",
|
404 |
+
"message": f"An error occurred while processing your request: {str(e)}"
|
405 |
+
}), 500
|
406 |
+
|
407 |
+
|
408 |
+
@app.route('/api/stats', methods=['GET'])
|
409 |
+
def get_stats():
|
410 |
+
"""Endpoint to get statistics about the loaded dataset."""
|
411 |
+
global recommender, recommender_model_loaded
|
412 |
+
|
413 |
+
if not recommender_model_loaded:
|
414 |
+
return jsonify({
|
415 |
+
"error": "Model not loaded",
|
416 |
+
"message": "The recommendation model is not properly loaded."
|
417 |
+
}), 503
|
418 |
+
|
419 |
+
try:
|
420 |
+
stats = {
|
421 |
+
"total_books": len(recommender.df) if recommender.df is not None else 0,
|
422 |
+
"model_name": recommender.model_name,
|
423 |
+
"categories": list(recommender.df['Category'].value_counts().head(10).to_dict().keys())
|
424 |
+
if recommender.df is not None and 'Category' in recommender.df.columns else []
|
425 |
+
}
|
426 |
+
|
427 |
+
return jsonify(stats)
|
428 |
+
|
429 |
+
except Exception as e:
|
430 |
+
logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
|
431 |
+
return jsonify({
|
432 |
+
"error": "Processing error",
|
433 |
+
"message": f"An error occurred while retrieving stats: {str(e)}"
|
434 |
+
}), 500
|
435 |
+
|
436 |
+
@app.route('/', methods=['GET'])
|
437 |
+
def index():
|
438 |
+
"""Root endpoint providing API information."""
|
439 |
+
return jsonify({
|
440 |
+
"status": "API is running",
|
441 |
+
"services": {
|
442 |
+
"intent_analysis": "Available at /api/analyze",
|
443 |
+
"book_recommendations": "Available at /api/recommend",
|
444 |
+
"statistics": "Available at /api/stats",
|
445 |
+
"health_check": "Available at /api/health"
|
446 |
+
},
|
447 |
+
"version": "1.0.0"
|
448 |
+
})
|
449 |
+
|
450 |
+
if __name__ == '__main__':
|
451 |
+
# Initialize models when the app starts
|
452 |
+
initialize_models()
|
453 |
+
|
454 |
+
# Set port from environment variable or default to 5000
|
455 |
+
port = int(os.environ.get('PORT', 5000))
|
456 |
+
|
457 |
+
# For development use debug=True, for production use debug=False
|
458 |
+
app.run(host='0.0.0.0', port=port, debug=False)
|
459 |
+
|
460 |
+
#curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
|
app3.py
ADDED
@@ -0,0 +1,580 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify, render_template
|
2 |
+
from flask_cors import CORS
|
3 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
import pickle
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import logging
|
12 |
+
import csv
|
13 |
+
import re
|
14 |
+
import nltk
|
15 |
+
from nltk.corpus import stopwords
|
16 |
+
from nltk.stem import WordNetLemmatizer
|
17 |
+
from datetime import datetime
|
18 |
+
|
19 |
+
# Download necessary NLTK resources
|
20 |
+
nltk.download('stopwords', quiet=True)
|
21 |
+
nltk.download('punkt', quiet=True)
|
22 |
+
nltk.download('wordnet', quiet=True)
|
23 |
+
|
24 |
+
# Configure logging
|
25 |
+
logging.basicConfig(level=logging.INFO,
|
26 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
27 |
+
handlers=[logging.FileHandler("combined_api.log"),
|
28 |
+
logging.StreamHandler()])
|
29 |
+
logger = logging.getLogger(__name__)
|
30 |
+
|
31 |
+
app = Flask(__name__)
|
32 |
+
CORS(app) # Enable Cross-Origin Resource Sharing
|
33 |
+
|
34 |
+
# Global variables and constants
|
35 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
36 |
+
INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
|
37 |
+
RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
|
38 |
+
EVAL_CSV = "model_evaluation.csv"
|
39 |
+
|
40 |
+
# Global model variables
|
41 |
+
intent_model = None
|
42 |
+
intent_tokenizer = None
|
43 |
+
intent_classes = None
|
44 |
+
intent_thresholds = None
|
45 |
+
recommender = None
|
46 |
+
recommender_model_loaded = False
|
47 |
+
|
48 |
+
#################################################
|
49 |
+
# Book Recommender System
|
50 |
+
#################################################
|
51 |
+
|
52 |
+
class BookRecommender:
|
53 |
+
def __init__(self, model_name='all-minilm-l6-v2'):
|
54 |
+
"""Initialize the book recommender with specified model."""
|
55 |
+
self.model_name = model_name
|
56 |
+
self.model = None
|
57 |
+
self.book_embeddings = None
|
58 |
+
self.df = None
|
59 |
+
self.stop_words = set(stopwords.words('english'))
|
60 |
+
self.lemmatizer = WordNetLemmatizer()
|
61 |
+
logger.info(f"BookRecommender initialized with model: {model_name}")
|
62 |
+
|
63 |
+
def preprocess_text(self, text):
|
64 |
+
"""Advanced text preprocessing with stopword removal and lemmatization."""
|
65 |
+
if not isinstance(text, str):
|
66 |
+
return ""
|
67 |
+
|
68 |
+
# Convert to lowercase and remove special characters
|
69 |
+
text = text.lower()
|
70 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
71 |
+
|
72 |
+
# Tokenize, remove stopwords, and lemmatize
|
73 |
+
tokens = nltk.word_tokenize(text)
|
74 |
+
tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
|
75 |
+
|
76 |
+
return ' '.join(tokens)
|
77 |
+
|
78 |
+
def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
|
79 |
+
"""Load a previously saved model and embeddings for inference."""
|
80 |
+
try:
|
81 |
+
# Check if folder exists
|
82 |
+
if not os.path.exists(folder_path):
|
83 |
+
logger.error(f"Model folder {folder_path} does not exist.")
|
84 |
+
return False
|
85 |
+
|
86 |
+
# Load configuration
|
87 |
+
config_path = os.path.join(folder_path, "config.pkl")
|
88 |
+
with open(config_path, 'rb') as f:
|
89 |
+
config = pickle.load(f)
|
90 |
+
self.model_name = config['model_name']
|
91 |
+
logger.info(f"Loaded configuration: model_name={self.model_name}")
|
92 |
+
|
93 |
+
# Load the sentence transformer model
|
94 |
+
model_path = os.path.join(folder_path, "sentence_transformer")
|
95 |
+
self.model = SentenceTransformer(model_path)
|
96 |
+
logger.info(f"Model loaded from {model_path}")
|
97 |
+
|
98 |
+
# Load book embeddings
|
99 |
+
embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
|
100 |
+
with open(embeddings_path, 'rb') as f:
|
101 |
+
self.book_embeddings = pickle.load(f)
|
102 |
+
logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
|
103 |
+
|
104 |
+
# Load the DataFrame
|
105 |
+
df_path = os.path.join(folder_path, "books_data.pkl")
|
106 |
+
with open(df_path, 'rb') as f:
|
107 |
+
self.df = pickle.load(f)
|
108 |
+
logger.info(f"DataFrame loaded: {len(self.df)} books")
|
109 |
+
|
110 |
+
return True
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
logger.error(f"Error loading model: {str(e)}", exc_info=True)
|
114 |
+
return False
|
115 |
+
|
116 |
+
def recommend_books(self, user_query, top_n=5, include_description=True):
|
117 |
+
"""Recommend books based on user query."""
|
118 |
+
if self.model is None or self.book_embeddings is None or self.df is None:
|
119 |
+
logger.error("Model not initialized. Cannot make recommendations.")
|
120 |
+
return []
|
121 |
+
|
122 |
+
logger.info(f"Finding books similar to: '{user_query}'")
|
123 |
+
|
124 |
+
try:
|
125 |
+
# Preprocess the query the same way as the book text
|
126 |
+
processed_query = self.preprocess_text(user_query)
|
127 |
+
|
128 |
+
# Encode user query
|
129 |
+
user_embedding = self.model.encode([processed_query])
|
130 |
+
|
131 |
+
# Compute similarity between query and books
|
132 |
+
similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
|
133 |
+
|
134 |
+
# Get top N most similar books
|
135 |
+
similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
|
136 |
+
|
137 |
+
recommendations = []
|
138 |
+
|
139 |
+
for i, idx in enumerate(similar_books_idx):
|
140 |
+
book_data = {}
|
141 |
+
|
142 |
+
# Extract book information
|
143 |
+
if 'Title' in self.df.columns:
|
144 |
+
book_data['title'] = self.df.iloc[idx]['Title']
|
145 |
+
|
146 |
+
if 'Authors' in self.df.columns:
|
147 |
+
book_data['author'] = self.df.iloc[idx]['Authors']
|
148 |
+
|
149 |
+
if 'Category' in self.df.columns:
|
150 |
+
book_data['category'] = self.df.iloc[idx]['Category']
|
151 |
+
|
152 |
+
if 'Publish Date (Year)' in self.df.columns:
|
153 |
+
book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
|
154 |
+
|
155 |
+
if include_description and 'Description' in self.df.columns:
|
156 |
+
# Truncate long descriptions
|
157 |
+
description = self.df.iloc[idx]['Description']
|
158 |
+
if len(description) > 200:
|
159 |
+
description = description[:197] + "..."
|
160 |
+
book_data['description'] = description
|
161 |
+
|
162 |
+
# Add similarity score
|
163 |
+
book_data['relevance_score'] = float(similarities[idx])
|
164 |
+
book_data['rank'] = i + 1
|
165 |
+
|
166 |
+
recommendations.append(book_data)
|
167 |
+
|
168 |
+
logger.info(f"Successfully generated {len(recommendations)} recommendations")
|
169 |
+
return recommendations
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
|
173 |
+
return []
|
174 |
+
|
175 |
+
#################################################
|
176 |
+
# Intent Classification
|
177 |
+
#################################################
|
178 |
+
|
179 |
+
def setup_evaluation_csv():
|
180 |
+
"""Set up the CSV file for tracking model performance"""
|
181 |
+
if not os.path.exists(EVAL_CSV):
|
182 |
+
with open(EVAL_CSV, 'w', newline='') as f:
|
183 |
+
writer = csv.writer(f)
|
184 |
+
writer.writerow([
|
185 |
+
'timestamp',
|
186 |
+
'input_text',
|
187 |
+
'predicted_intent',
|
188 |
+
'is_ood',
|
189 |
+
'confidence',
|
190 |
+
'energy_score',
|
191 |
+
'detection_method'
|
192 |
+
])
|
193 |
+
logger.info(f"Created evaluation CSV file: {EVAL_CSV}")
|
194 |
+
|
195 |
+
def save_prediction_to_csv(input_text, result, method):
|
196 |
+
"""Save prediction results to CSV for later analysis"""
|
197 |
+
with open(EVAL_CSV, 'a', newline='') as f:
|
198 |
+
writer = csv.writer(f)
|
199 |
+
writer.writerow([
|
200 |
+
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
201 |
+
input_text,
|
202 |
+
result['intent'],
|
203 |
+
result['is_ood'],
|
204 |
+
result['confidence'],
|
205 |
+
result['energy_score'],
|
206 |
+
method
|
207 |
+
])
|
208 |
+
|
209 |
+
def load_ood_thresholds(model_path):
|
210 |
+
"""Load the OOD thresholds from the model directory"""
|
211 |
+
threshold_path = os.path.join(model_path, "ood_thresholds.json")
|
212 |
+
|
213 |
+
if os.path.exists(threshold_path):
|
214 |
+
with open(threshold_path, "r") as f:
|
215 |
+
return json.load(f)
|
216 |
+
else:
|
217 |
+
# Provide default thresholds if file not found
|
218 |
+
logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
|
219 |
+
return {
|
220 |
+
"energy_threshold": 0.0, # Replace with your default value
|
221 |
+
"msp_threshold": 0.5 # Replace with your default value
|
222 |
+
}
|
223 |
+
|
224 |
+
def load_intent_resources():
|
225 |
+
"""Load model, tokenizer, intent classes, and thresholds for intent classification."""
|
226 |
+
global intent_model, intent_tokenizer, intent_classes, intent_thresholds
|
227 |
+
|
228 |
+
logger.info(f"Loading intent resources from {INTENT_MODEL_PATH}...")
|
229 |
+
|
230 |
+
try:
|
231 |
+
# Load model and tokenizer
|
232 |
+
intent_model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
|
233 |
+
intent_tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
|
234 |
+
|
235 |
+
# Load intent classes
|
236 |
+
intent_classes_path = os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl")
|
237 |
+
if os.path.exists(intent_classes_path):
|
238 |
+
with open(intent_classes_path, "rb") as f:
|
239 |
+
intent_classes = pickle.load(f)
|
240 |
+
else:
|
241 |
+
raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
|
242 |
+
|
243 |
+
# Load OOD thresholds
|
244 |
+
intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
|
245 |
+
|
246 |
+
logger.info("Intent resources loaded successfully")
|
247 |
+
logger.info(f"Loaded {len(intent_classes)} intent classes")
|
248 |
+
logger.info(f"Thresholds: {intent_thresholds}")
|
249 |
+
return True
|
250 |
+
|
251 |
+
except Exception as e:
|
252 |
+
logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
|
253 |
+
return False
|
254 |
+
|
255 |
+
def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
|
256 |
+
energy_threshold, msp_threshold, method='combined'):
|
257 |
+
"""
|
258 |
+
Predict intent with enhanced out-of-distribution detection and detailed logging.
|
259 |
+
"""
|
260 |
+
logger.info("\n========== INTENT PREDICTION DEBUG ==========")
|
261 |
+
logger.info(f"Input Text: {text}")
|
262 |
+
logger.info(f"Detection Method: {method}")
|
263 |
+
|
264 |
+
# Tokenize input
|
265 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
266 |
+
|
267 |
+
# Get model outputs
|
268 |
+
with torch.no_grad():
|
269 |
+
outputs = model(**inputs)
|
270 |
+
logits = outputs.logits
|
271 |
+
|
272 |
+
logger.info(f"Logits: {logits.numpy().tolist()}")
|
273 |
+
|
274 |
+
# Get probabilities
|
275 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
276 |
+
max_prob, pred_idx = torch.max(probs, dim=-1)
|
277 |
+
|
278 |
+
logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
|
279 |
+
logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
|
280 |
+
logger.info(f"Predicted Index: {pred_idx.item()}")
|
281 |
+
|
282 |
+
# Calculate energy score
|
283 |
+
energy = -torch.logsumexp(logits, dim=-1)
|
284 |
+
logger.info(f"Energy Score: {energy.item():.4f}")
|
285 |
+
|
286 |
+
# OOD detection
|
287 |
+
is_ood = False
|
288 |
+
if method == 'energy':
|
289 |
+
is_ood = energy.item() > energy_threshold
|
290 |
+
elif method == 'msp':
|
291 |
+
is_ood = max_prob.item() < msp_threshold
|
292 |
+
elif method == 'combined':
|
293 |
+
is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
|
294 |
+
|
295 |
+
logger.info(f"OOD Detection -> is_ood: {is_ood}")
|
296 |
+
if is_ood:
|
297 |
+
logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
|
298 |
+
else:
|
299 |
+
logger.info("Prediction marked as IN-DISTRIBUTION.")
|
300 |
+
|
301 |
+
# Get intent label
|
302 |
+
predicted_intent = intent_classes[pred_idx.item()]
|
303 |
+
logger.info(f"Predicted Intent: {predicted_intent}")
|
304 |
+
logger.info("=============================================\n")
|
305 |
+
|
306 |
+
return {
|
307 |
+
"intent": predicted_intent,
|
308 |
+
"is_ood": is_ood,
|
309 |
+
"confidence": max_prob.item(),
|
310 |
+
"energy_score": energy.item(),
|
311 |
+
# Add all class probabilities for detailed analysis
|
312 |
+
"class_probabilities": {
|
313 |
+
intent_classes[i]: float(prob)
|
314 |
+
for i, prob in enumerate(probs[0].numpy())
|
315 |
+
}
|
316 |
+
}
|
317 |
+
|
318 |
+
#################################################
|
319 |
+
# Server Initialization
|
320 |
+
#################################################
|
321 |
+
|
322 |
+
def initialize_models():
|
323 |
+
"""Load all required models on startup."""
|
324 |
+
global recommender, recommender_model_loaded
|
325 |
+
|
326 |
+
# Create evaluation CSV if it doesn't exist
|
327 |
+
setup_evaluation_csv()
|
328 |
+
|
329 |
+
# Load intent classification model
|
330 |
+
intent_model_loaded = load_intent_resources()
|
331 |
+
if intent_model_loaded:
|
332 |
+
logger.info("Intent classification model loaded successfully!")
|
333 |
+
else:
|
334 |
+
logger.error("Failed to load intent model.")
|
335 |
+
|
336 |
+
# Initialize book recommender
|
337 |
+
recommender = BookRecommender()
|
338 |
+
recommender_model_loaded = recommender.load_model()
|
339 |
+
if recommender_model_loaded:
|
340 |
+
logger.info("Book recommendation model loaded successfully!")
|
341 |
+
else:
|
342 |
+
logger.error("Failed to load book recommendation model.")
|
343 |
+
|
344 |
+
return intent_model_loaded and recommender_model_loaded
|
345 |
+
|
346 |
+
#################################################
|
347 |
+
# API Routes
|
348 |
+
#################################################
|
349 |
+
|
350 |
+
@app.route('/api/health', methods=['GET'])
|
351 |
+
def health_check():
|
352 |
+
"""Endpoint to check if the API is running and models are loaded."""
|
353 |
+
intent_models_loaded = intent_model is not None and intent_tokenizer is not None
|
354 |
+
|
355 |
+
return jsonify({
|
356 |
+
"status": "healthy" if (intent_models_loaded and recommender_model_loaded) else "partially_healthy" if (intent_models_loaded or recommender_model_loaded) else "unhealthy",
|
357 |
+
"intent_model_loaded": intent_models_loaded,
|
358 |
+
"recommender_model_loaded": recommender_model_loaded,
|
359 |
+
"available_endpoints": [
|
360 |
+
"/api/health",
|
361 |
+
"/api/analyze",
|
362 |
+
"/api/recommend",
|
363 |
+
"/api/stats",
|
364 |
+
"/api/download_eval_data"
|
365 |
+
]
|
366 |
+
})
|
367 |
+
|
368 |
+
#################################################
|
369 |
+
# Intent Classification Routes
|
370 |
+
#################################################
|
371 |
+
|
372 |
+
@app.route('/api/analyze', methods=['POST'])
|
373 |
+
def analyze():
|
374 |
+
"""Endpoint to predict intent from text."""
|
375 |
+
# Check if request contains JSON
|
376 |
+
if not request.is_json:
|
377 |
+
return jsonify({"error": "Request must be JSON"}), 400
|
378 |
+
|
379 |
+
# Get text from request
|
380 |
+
data = request.get_json()
|
381 |
+
if 'text' not in data:
|
382 |
+
return jsonify({"error": "Missing 'text' field in request"}), 400
|
383 |
+
|
384 |
+
text = data['text']
|
385 |
+
|
386 |
+
# Default to combined method unless specified
|
387 |
+
method = data.get('method', 'combined')
|
388 |
+
if method not in ['energy', 'msp', 'combined']:
|
389 |
+
return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
|
390 |
+
|
391 |
+
# Make prediction
|
392 |
+
result = predict_intent_with_enhanced_ood(
|
393 |
+
text,
|
394 |
+
intent_model,
|
395 |
+
intent_tokenizer,
|
396 |
+
intent_classes,
|
397 |
+
intent_thresholds["energy_threshold"],
|
398 |
+
intent_thresholds["msp_threshold"],
|
399 |
+
method=method
|
400 |
+
)
|
401 |
+
|
402 |
+
# Save result to CSV for evaluation
|
403 |
+
save_prediction_to_csv(text, result, method)
|
404 |
+
|
405 |
+
# Return prediction as JSON
|
406 |
+
return jsonify(result)
|
407 |
+
|
408 |
+
@app.route('/api/stats', methods=['GET'])
|
409 |
+
def get_stats():
|
410 |
+
"""Get statistics about model usage and predictions."""
|
411 |
+
try:
|
412 |
+
stats = {
|
413 |
+
"intent_model_info": {
|
414 |
+
"num_intent_classes": len(intent_classes) if intent_classes else 0,
|
415 |
+
"model_path": INTENT_MODEL_PATH,
|
416 |
+
"thresholds": intent_thresholds
|
417 |
+
},
|
418 |
+
"recommender_model_info": {
|
419 |
+
"model_name": recommender.model_name if recommender else None,
|
420 |
+
"num_books": len(recommender.df) if recommender and recommender.df is not None else 0
|
421 |
+
},
|
422 |
+
"usage": {}
|
423 |
+
}
|
424 |
+
|
425 |
+
# Read CSV to generate statistics if it exists
|
426 |
+
if os.path.exists(EVAL_CSV):
|
427 |
+
with open(EVAL_CSV, 'r') as f:
|
428 |
+
reader = csv.DictReader(f)
|
429 |
+
rows = list(reader)
|
430 |
+
|
431 |
+
stats["usage"] = {
|
432 |
+
"total_queries": len(rows),
|
433 |
+
"ood_count": sum(1 for row in rows if row["is_ood"] == "True"),
|
434 |
+
"top_intents": {}
|
435 |
+
}
|
436 |
+
|
437 |
+
# Count intents for statistical analysis
|
438 |
+
intent_counts = {}
|
439 |
+
for row in rows:
|
440 |
+
intent = row["predicted_intent"]
|
441 |
+
if intent not in intent_counts:
|
442 |
+
intent_counts[intent] = 0
|
443 |
+
intent_counts[intent] += 1
|
444 |
+
|
445 |
+
# Get top 5 intents
|
446 |
+
top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
447 |
+
stats["usage"]["top_intents"] = dict(top_intents)
|
448 |
+
|
449 |
+
return jsonify(stats)
|
450 |
+
|
451 |
+
except Exception as e:
|
452 |
+
logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
|
453 |
+
return jsonify({
|
454 |
+
"error": "Processing error",
|
455 |
+
"message": f"An error occurred while retrieving stats: {str(e)}"
|
456 |
+
}), 500
|
457 |
+
|
458 |
+
@app.route('/api/download_eval_data', methods=['GET'])
|
459 |
+
def download_eval_data():
|
460 |
+
"""Return the evaluation data as JSON for analysis"""
|
461 |
+
try:
|
462 |
+
if not os.path.exists(EVAL_CSV):
|
463 |
+
return jsonify({"error": "No evaluation data available yet"}), 404
|
464 |
+
|
465 |
+
with open(EVAL_CSV, 'r') as f:
|
466 |
+
reader = csv.DictReader(f)
|
467 |
+
rows = list(reader)
|
468 |
+
|
469 |
+
return jsonify({
|
470 |
+
"count": len(rows),
|
471 |
+
"data": rows
|
472 |
+
})
|
473 |
+
|
474 |
+
except Exception as e:
|
475 |
+
logger.error(f"Error downloading evaluation data: {str(e)}", exc_info=True)
|
476 |
+
return jsonify({
|
477 |
+
"error": "Processing error",
|
478 |
+
"message": f"An error occurred: {str(e)}"
|
479 |
+
}), 500
|
480 |
+
|
481 |
+
#################################################
|
482 |
+
# Book Recommender Routes
|
483 |
+
#################################################
|
484 |
+
|
485 |
+
@app.route('/api/recommend', methods=['POST'])
|
486 |
+
def recommend():
|
487 |
+
"""Endpoint to get book recommendations based on user query."""
|
488 |
+
global recommender_model_loaded
|
489 |
+
|
490 |
+
if not recommender_model_loaded:
|
491 |
+
return jsonify({
|
492 |
+
"error": "Model not loaded",
|
493 |
+
"message": "The recommendation model is not properly loaded."
|
494 |
+
}), 503
|
495 |
+
|
496 |
+
data = request.get_json()
|
497 |
+
|
498 |
+
if not data:
|
499 |
+
return jsonify({
|
500 |
+
"error": "Invalid request",
|
501 |
+
"message": "No JSON data provided."
|
502 |
+
}), 400
|
503 |
+
|
504 |
+
query = data.get('query')
|
505 |
+
top_n = data.get('top_n', 5)
|
506 |
+
include_description = data.get('include_description', True)
|
507 |
+
threshold = data.get('threshold', 0.5) # default threshold
|
508 |
+
|
509 |
+
if not query:
|
510 |
+
return jsonify({
|
511 |
+
"error": "Missing parameter",
|
512 |
+
"message": "Query parameter is required."
|
513 |
+
}), 400
|
514 |
+
|
515 |
+
try:
|
516 |
+
# Get recommendations
|
517 |
+
recommendations = recommender.recommend_books(
|
518 |
+
user_query=query,
|
519 |
+
top_n=int(top_n),
|
520 |
+
include_description=bool(include_description)
|
521 |
+
)
|
522 |
+
|
523 |
+
# Clean recommendations to make it JSON serializable
|
524 |
+
def clean_np(obj):
|
525 |
+
if isinstance(obj, np.integer):
|
526 |
+
return int(obj)
|
527 |
+
elif isinstance(obj, np.floating):
|
528 |
+
return float(obj)
|
529 |
+
elif isinstance(obj, np.ndarray):
|
530 |
+
return obj.tolist()
|
531 |
+
elif isinstance(obj, dict):
|
532 |
+
return {k: clean_np(v) for k, v in obj.items()}
|
533 |
+
elif isinstance(obj, list):
|
534 |
+
return [clean_np(i) for i in obj]
|
535 |
+
else:
|
536 |
+
return obj
|
537 |
+
|
538 |
+
recommendations_clean = clean_np(recommendations)
|
539 |
+
|
540 |
+
# Split based on threshold
|
541 |
+
high_score = [rec for rec in recommendations_clean if rec['relevance_score'] >= threshold]
|
542 |
+
low_score = [rec for rec in recommendations_clean if rec['relevance_score'] < threshold]
|
543 |
+
|
544 |
+
return jsonify({
|
545 |
+
"query": query,
|
546 |
+
"threshold": threshold,
|
547 |
+
"high_recommendations": high_score,
|
548 |
+
"low_recommendations": low_score,
|
549 |
+
"total_count": len(recommendations_clean),
|
550 |
+
"high_count": len(high_score),
|
551 |
+
"low_count": len(low_score)
|
552 |
+
})
|
553 |
+
|
554 |
+
except Exception as e:
|
555 |
+
logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
|
556 |
+
return jsonify({
|
557 |
+
"error": "Processing error",
|
558 |
+
"message": f"An error occurred while processing your request: {str(e)}"
|
559 |
+
}), 500
|
560 |
+
|
561 |
+
#################################################
|
562 |
+
# Main
|
563 |
+
#################################################
|
564 |
+
|
565 |
+
if __name__ == '__main__':
|
566 |
+
# Initialize models when the app starts
|
567 |
+
models_loaded = initialize_models()
|
568 |
+
|
569 |
+
# Set port from environment variable or default to 5000
|
570 |
+
port = int(os.environ.get('PORT', 5000))
|
571 |
+
|
572 |
+
# For development use debug=True, for production use debug=False
|
573 |
+
app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
|
574 |
+
|
575 |
+
|
576 |
+
|
577 |
+
#curl -X POST http://localhost:5000/api/analyze \-H "Content-Type: application/json" \-d '{"text": "cariin buku", "method": "combined"}'
|
578 |
+
|
579 |
+
#curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
|
580 |
+
|
app4.py
ADDED
@@ -0,0 +1,580 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify, render_template
|
2 |
+
from flask_cors import CORS
|
3 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
import pickle
|
9 |
+
import os
|
10 |
+
import json
|
11 |
+
import logging
|
12 |
+
import csv
|
13 |
+
import re
|
14 |
+
import nltk
|
15 |
+
from nltk.corpus import stopwords
|
16 |
+
from nltk.stem import WordNetLemmatizer
|
17 |
+
from datetime import datetime
|
18 |
+
|
19 |
+
# Download necessary NLTK resources
|
20 |
+
nltk.download('stopwords', quiet=True)
|
21 |
+
nltk.download('punkt', quiet=True)
|
22 |
+
nltk.download('wordnet', quiet=True)
|
23 |
+
|
24 |
+
# Configure logging
|
25 |
+
logging.basicConfig(level=logging.INFO,
|
26 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
27 |
+
handlers=[logging.FileHandler("combined_api.log"),
|
28 |
+
logging.StreamHandler()])
|
29 |
+
logger = logging.getLogger(__name__)
|
30 |
+
|
31 |
+
app = Flask(__name__)
|
32 |
+
CORS(app) # Enable Cross-Origin Resource Sharing
|
33 |
+
|
34 |
+
# Global variables and constants
|
35 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
36 |
+
INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
|
37 |
+
RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
|
38 |
+
EVAL_CSV = "model_evaluation.csv"
|
39 |
+
|
40 |
+
# Global model variables
|
41 |
+
intent_model = None
|
42 |
+
intent_tokenizer = None
|
43 |
+
intent_classes = None
|
44 |
+
intent_thresholds = None
|
45 |
+
recommender = None
|
46 |
+
recommender_model_loaded = False
|
47 |
+
|
48 |
+
#################################################
|
49 |
+
# Book Recommender System
|
50 |
+
#################################################
|
51 |
+
|
52 |
+
class BookRecommender:
|
53 |
+
def __init__(self, model_name='all-minilm-l6-v2'):
|
54 |
+
"""Initialize the book recommender with specified model."""
|
55 |
+
self.model_name = model_name
|
56 |
+
self.model = None
|
57 |
+
self.book_embeddings = None
|
58 |
+
self.df = None
|
59 |
+
self.stop_words = set(stopwords.words('english'))
|
60 |
+
self.lemmatizer = WordNetLemmatizer()
|
61 |
+
logger.info(f"BookRecommender initialized with model: {model_name}")
|
62 |
+
|
63 |
+
def preprocess_text(self, text):
|
64 |
+
"""Advanced text preprocessing with stopword removal and lemmatization."""
|
65 |
+
if not isinstance(text, str):
|
66 |
+
return ""
|
67 |
+
|
68 |
+
# Convert to lowercase and remove special characters
|
69 |
+
text = text.lower()
|
70 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
71 |
+
|
72 |
+
# Tokenize, remove stopwords, and lemmatize
|
73 |
+
tokens = nltk.word_tokenize(text)
|
74 |
+
tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
|
75 |
+
|
76 |
+
return ' '.join(tokens)
|
77 |
+
|
78 |
+
def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
|
79 |
+
"""Load a previously saved model and embeddings for inference."""
|
80 |
+
try:
|
81 |
+
# Check if folder exists
|
82 |
+
if not os.path.exists(folder_path):
|
83 |
+
logger.error(f"Model folder {folder_path} does not exist.")
|
84 |
+
return False
|
85 |
+
|
86 |
+
# Load configuration
|
87 |
+
config_path = os.path.join(folder_path, "config.pkl")
|
88 |
+
with open(config_path, 'rb') as f:
|
89 |
+
config = pickle.load(f)
|
90 |
+
self.model_name = config['model_name']
|
91 |
+
logger.info(f"Loaded configuration: model_name={self.model_name}")
|
92 |
+
|
93 |
+
# Load the sentence transformer model
|
94 |
+
model_path = os.path.join(folder_path, "sentence_transformer")
|
95 |
+
self.model = SentenceTransformer(model_path)
|
96 |
+
logger.info(f"Model loaded from {model_path}")
|
97 |
+
|
98 |
+
# Load book embeddings
|
99 |
+
embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
|
100 |
+
with open(embeddings_path, 'rb') as f:
|
101 |
+
self.book_embeddings = pickle.load(f)
|
102 |
+
logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
|
103 |
+
|
104 |
+
# Load the DataFrame
|
105 |
+
df_path = os.path.join(folder_path, "books_data.pkl")
|
106 |
+
with open(df_path, 'rb') as f:
|
107 |
+
self.df = pickle.load(f)
|
108 |
+
logger.info(f"DataFrame loaded: {len(self.df)} books")
|
109 |
+
|
110 |
+
return True
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
logger.error(f"Error loading model: {str(e)}", exc_info=True)
|
114 |
+
return False
|
115 |
+
|
116 |
+
def recommend_books(self, user_query, top_n=5, include_description=True):
|
117 |
+
"""Recommend books based on user query."""
|
118 |
+
if self.model is None or self.book_embeddings is None or self.df is None:
|
119 |
+
logger.error("Model not initialized. Cannot make recommendations.")
|
120 |
+
return []
|
121 |
+
|
122 |
+
logger.info(f"Finding books similar to: '{user_query}'")
|
123 |
+
|
124 |
+
try:
|
125 |
+
# Preprocess the query the same way as the book text
|
126 |
+
processed_query = self.preprocess_text(user_query)
|
127 |
+
|
128 |
+
# Encode user query
|
129 |
+
user_embedding = self.model.encode([processed_query])
|
130 |
+
|
131 |
+
# Compute similarity between query and books
|
132 |
+
similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
|
133 |
+
|
134 |
+
# Get top N most similar books
|
135 |
+
similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
|
136 |
+
|
137 |
+
recommendations = []
|
138 |
+
|
139 |
+
for i, idx in enumerate(similar_books_idx):
|
140 |
+
book_data = {}
|
141 |
+
|
142 |
+
# Extract book information
|
143 |
+
if 'Title' in self.df.columns:
|
144 |
+
book_data['title'] = self.df.iloc[idx]['Title']
|
145 |
+
|
146 |
+
if 'Authors' in self.df.columns:
|
147 |
+
book_data['author'] = self.df.iloc[idx]['Authors']
|
148 |
+
|
149 |
+
if 'Category' in self.df.columns:
|
150 |
+
book_data['category'] = self.df.iloc[idx]['Category']
|
151 |
+
|
152 |
+
if 'Publish Date (Year)' in self.df.columns:
|
153 |
+
book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
|
154 |
+
|
155 |
+
if include_description and 'Description' in self.df.columns:
|
156 |
+
# Truncate long descriptions
|
157 |
+
description = self.df.iloc[idx]['Description']
|
158 |
+
if len(description) > 200:
|
159 |
+
description = description[:197] + "..."
|
160 |
+
book_data['description'] = description
|
161 |
+
|
162 |
+
# Add similarity score
|
163 |
+
book_data['relevance_score'] = float(similarities[idx])
|
164 |
+
book_data['rank'] = i + 1
|
165 |
+
|
166 |
+
recommendations.append(book_data)
|
167 |
+
|
168 |
+
logger.info(f"Successfully generated {len(recommendations)} recommendations")
|
169 |
+
return recommendations
|
170 |
+
|
171 |
+
except Exception as e:
|
172 |
+
logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
|
173 |
+
return []
|
174 |
+
|
175 |
+
#################################################
|
176 |
+
# Intent Classification
|
177 |
+
#################################################
|
178 |
+
|
179 |
+
def setup_evaluation_csv():
|
180 |
+
"""Set up the CSV file for tracking model performance"""
|
181 |
+
if not os.path.exists(EVAL_CSV):
|
182 |
+
with open(EVAL_CSV, 'w', newline='') as f:
|
183 |
+
writer = csv.writer(f)
|
184 |
+
writer.writerow([
|
185 |
+
'timestamp',
|
186 |
+
'input_text',
|
187 |
+
'predicted_intent',
|
188 |
+
'is_ood',
|
189 |
+
'confidence',
|
190 |
+
'energy_score',
|
191 |
+
'detection_method'
|
192 |
+
])
|
193 |
+
logger.info(f"Created evaluation CSV file: {EVAL_CSV}")
|
194 |
+
|
195 |
+
def save_prediction_to_csv(input_text, result, method):
|
196 |
+
"""Save prediction results to CSV for later analysis"""
|
197 |
+
with open(EVAL_CSV, 'a', newline='') as f:
|
198 |
+
writer = csv.writer(f)
|
199 |
+
writer.writerow([
|
200 |
+
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
201 |
+
input_text,
|
202 |
+
result['intent'],
|
203 |
+
result['is_ood'],
|
204 |
+
result['confidence'],
|
205 |
+
result['energy_score'],
|
206 |
+
method
|
207 |
+
])
|
208 |
+
|
209 |
+
def load_ood_thresholds(model_path):
|
210 |
+
"""Load the OOD thresholds from the model directory"""
|
211 |
+
threshold_path = os.path.join(model_path, "ood_thresholds.json")
|
212 |
+
|
213 |
+
if os.path.exists(threshold_path):
|
214 |
+
with open(threshold_path, "r") as f:
|
215 |
+
return json.load(f)
|
216 |
+
else:
|
217 |
+
# Provide default thresholds if file not found
|
218 |
+
logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
|
219 |
+
return {
|
220 |
+
"energy_threshold": 0.0, # Replace with your default value
|
221 |
+
"msp_threshold": 0.5 # Replace with your default value
|
222 |
+
}
|
223 |
+
|
224 |
+
def load_intent_resources():
|
225 |
+
"""Load model, tokenizer, intent classes, and thresholds for intent classification."""
|
226 |
+
global intent_model, intent_tokenizer, intent_classes, intent_thresholds
|
227 |
+
|
228 |
+
logger.info(f"Loading intent resources from {INTENT_MODEL_PATH}...")
|
229 |
+
|
230 |
+
try:
|
231 |
+
# Load model and tokenizer
|
232 |
+
intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
|
233 |
+
intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
|
234 |
+
|
235 |
+
# Load intent classes
|
236 |
+
intent_classes_path = os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl")
|
237 |
+
if os.path.exists(intent_classes_path):
|
238 |
+
with open(intent_classes_path, "rb") as f:
|
239 |
+
intent_classes = pickle.load(f)
|
240 |
+
else:
|
241 |
+
raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
|
242 |
+
|
243 |
+
# Load OOD thresholds
|
244 |
+
intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
|
245 |
+
|
246 |
+
logger.info("Intent resources loaded successfully")
|
247 |
+
logger.info(f"Loaded {len(intent_classes)} intent classes")
|
248 |
+
logger.info(f"Thresholds: {intent_thresholds}")
|
249 |
+
return True
|
250 |
+
|
251 |
+
except Exception as e:
|
252 |
+
logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
|
253 |
+
return False
|
254 |
+
|
255 |
+
def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
|
256 |
+
energy_threshold, msp_threshold, method='combined'):
|
257 |
+
"""
|
258 |
+
Predict intent with enhanced out-of-distribution detection and detailed logging.
|
259 |
+
"""
|
260 |
+
logger.info("\n========== INTENT PREDICTION DEBUG ==========")
|
261 |
+
logger.info(f"Input Text: {text}")
|
262 |
+
logger.info(f"Detection Method: {method}")
|
263 |
+
|
264 |
+
# Tokenize input
|
265 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
266 |
+
|
267 |
+
# Get model outputs
|
268 |
+
with torch.no_grad():
|
269 |
+
outputs = model(**inputs)
|
270 |
+
logits = outputs.logits
|
271 |
+
|
272 |
+
logger.info(f"Logits: {logits.numpy().tolist()}")
|
273 |
+
|
274 |
+
# Get probabilities
|
275 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
276 |
+
max_prob, pred_idx = torch.max(probs, dim=-1)
|
277 |
+
|
278 |
+
logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
|
279 |
+
logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
|
280 |
+
logger.info(f"Predicted Index: {pred_idx.item()}")
|
281 |
+
|
282 |
+
# Calculate energy score
|
283 |
+
energy = -torch.logsumexp(logits, dim=-1)
|
284 |
+
logger.info(f"Energy Score: {energy.item():.4f}")
|
285 |
+
|
286 |
+
# OOD detection
|
287 |
+
is_ood = False
|
288 |
+
if method == 'energy':
|
289 |
+
is_ood = energy.item() > energy_threshold
|
290 |
+
elif method == 'msp':
|
291 |
+
is_ood = max_prob.item() < msp_threshold
|
292 |
+
elif method == 'combined':
|
293 |
+
is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
|
294 |
+
|
295 |
+
logger.info(f"OOD Detection -> is_ood: {is_ood}")
|
296 |
+
if is_ood:
|
297 |
+
logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
|
298 |
+
else:
|
299 |
+
logger.info("Prediction marked as IN-DISTRIBUTION.")
|
300 |
+
|
301 |
+
# Get intent label
|
302 |
+
predicted_intent = intent_classes[pred_idx.item()]
|
303 |
+
logger.info(f"Predicted Intent: {predicted_intent}")
|
304 |
+
logger.info("=============================================\n")
|
305 |
+
|
306 |
+
return {
|
307 |
+
"intent": predicted_intent,
|
308 |
+
"is_ood": is_ood,
|
309 |
+
"confidence": max_prob.item(),
|
310 |
+
"energy_score": energy.item(),
|
311 |
+
# Add all class probabilities for detailed analysis
|
312 |
+
"class_probabilities": {
|
313 |
+
intent_classes[i]: float(prob)
|
314 |
+
for i, prob in enumerate(probs[0].numpy())
|
315 |
+
}
|
316 |
+
}
|
317 |
+
|
318 |
+
#################################################
|
319 |
+
# Server Initialization
|
320 |
+
#################################################
|
321 |
+
|
322 |
+
def initialize_models():
|
323 |
+
"""Load all required models on startup."""
|
324 |
+
global recommender, recommender_model_loaded
|
325 |
+
|
326 |
+
# Create evaluation CSV if it doesn't exist
|
327 |
+
setup_evaluation_csv()
|
328 |
+
|
329 |
+
# Load intent classification model
|
330 |
+
intent_model_loaded = load_intent_resources()
|
331 |
+
if intent_model_loaded:
|
332 |
+
logger.info("Intent classification model loaded successfully!")
|
333 |
+
else:
|
334 |
+
logger.error("Failed to load intent model.")
|
335 |
+
|
336 |
+
# Initialize book recommender
|
337 |
+
recommender = BookRecommender()
|
338 |
+
recommender_model_loaded = recommender.load_model()
|
339 |
+
if recommender_model_loaded:
|
340 |
+
logger.info("Book recommendation model loaded successfully!")
|
341 |
+
else:
|
342 |
+
logger.error("Failed to load book recommendation model.")
|
343 |
+
|
344 |
+
return intent_model_loaded and recommender_model_loaded
|
345 |
+
|
346 |
+
#################################################
|
347 |
+
# API Routes
|
348 |
+
#################################################
|
349 |
+
|
350 |
+
@app.route('/api/health', methods=['GET'])
|
351 |
+
def health_check():
|
352 |
+
"""Endpoint to check if the API is running and models are loaded."""
|
353 |
+
intent_models_loaded = intent_model is not None and intent_tokenizer is not None
|
354 |
+
|
355 |
+
return jsonify({
|
356 |
+
"status": "healthy" if (intent_models_loaded and recommender_model_loaded) else "partially_healthy" if (intent_models_loaded or recommender_model_loaded) else "unhealthy",
|
357 |
+
"intent_model_loaded": intent_models_loaded,
|
358 |
+
"recommender_model_loaded": recommender_model_loaded,
|
359 |
+
"available_endpoints": [
|
360 |
+
"/api/health",
|
361 |
+
"/api/analyze",
|
362 |
+
"/api/recommend",
|
363 |
+
"/api/stats",
|
364 |
+
"/api/download_eval_data"
|
365 |
+
]
|
366 |
+
})
|
367 |
+
|
368 |
+
#################################################
|
369 |
+
# Intent Classification Routes
|
370 |
+
#################################################
|
371 |
+
|
372 |
+
@app.route('/api/analyze', methods=['POST'])
|
373 |
+
def analyze():
|
374 |
+
"""Endpoint to predict intent from text."""
|
375 |
+
# Check if request contains JSON
|
376 |
+
if not request.is_json:
|
377 |
+
return jsonify({"error": "Request must be JSON"}), 400
|
378 |
+
|
379 |
+
# Get text from request
|
380 |
+
data = request.get_json()
|
381 |
+
if 'text' not in data:
|
382 |
+
return jsonify({"error": "Missing 'text' field in request"}), 400
|
383 |
+
|
384 |
+
text = data['text']
|
385 |
+
|
386 |
+
# Default to combined method unless specified
|
387 |
+
method = data.get('method', 'combined')
|
388 |
+
if method not in ['energy', 'msp', 'combined']:
|
389 |
+
return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
|
390 |
+
|
391 |
+
# Make prediction
|
392 |
+
result = predict_intent_with_enhanced_ood(
|
393 |
+
text,
|
394 |
+
intent_model,
|
395 |
+
intent_tokenizer,
|
396 |
+
intent_classes,
|
397 |
+
intent_thresholds["energy_threshold"],
|
398 |
+
intent_thresholds["msp_threshold"],
|
399 |
+
method=method
|
400 |
+
)
|
401 |
+
|
402 |
+
# Save result to CSV for evaluation
|
403 |
+
save_prediction_to_csv(text, result, method)
|
404 |
+
|
405 |
+
# Return prediction as JSON
|
406 |
+
return jsonify(result)
|
407 |
+
|
408 |
+
@app.route('/api/stats', methods=['GET'])
|
409 |
+
def get_stats():
|
410 |
+
"""Get statistics about model usage and predictions."""
|
411 |
+
try:
|
412 |
+
stats = {
|
413 |
+
"intent_model_info": {
|
414 |
+
"num_intent_classes": len(intent_classes) if intent_classes else 0,
|
415 |
+
"model_path": INTENT_MODEL_PATH,
|
416 |
+
"thresholds": intent_thresholds
|
417 |
+
},
|
418 |
+
"recommender_model_info": {
|
419 |
+
"model_name": recommender.model_name if recommender else None,
|
420 |
+
"num_books": len(recommender.df) if recommender and recommender.df is not None else 0
|
421 |
+
},
|
422 |
+
"usage": {}
|
423 |
+
}
|
424 |
+
|
425 |
+
# Read CSV to generate statistics if it exists
|
426 |
+
if os.path.exists(EVAL_CSV):
|
427 |
+
with open(EVAL_CSV, 'r') as f:
|
428 |
+
reader = csv.DictReader(f)
|
429 |
+
rows = list(reader)
|
430 |
+
|
431 |
+
stats["usage"] = {
|
432 |
+
"total_queries": len(rows),
|
433 |
+
"ood_count": sum(1 for row in rows if row["is_ood"] == "True"),
|
434 |
+
"top_intents": {}
|
435 |
+
}
|
436 |
+
|
437 |
+
# Count intents for statistical analysis
|
438 |
+
intent_counts = {}
|
439 |
+
for row in rows:
|
440 |
+
intent = row["predicted_intent"]
|
441 |
+
if intent not in intent_counts:
|
442 |
+
intent_counts[intent] = 0
|
443 |
+
intent_counts[intent] += 1
|
444 |
+
|
445 |
+
# Get top 5 intents
|
446 |
+
top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
447 |
+
stats["usage"]["top_intents"] = dict(top_intents)
|
448 |
+
|
449 |
+
return jsonify(stats)
|
450 |
+
|
451 |
+
except Exception as e:
|
452 |
+
logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
|
453 |
+
return jsonify({
|
454 |
+
"error": "Processing error",
|
455 |
+
"message": f"An error occurred while retrieving stats: {str(e)}"
|
456 |
+
}), 500
|
457 |
+
|
458 |
+
@app.route('/api/download_eval_data', methods=['GET'])
|
459 |
+
def download_eval_data():
|
460 |
+
"""Return the evaluation data as JSON for analysis"""
|
461 |
+
try:
|
462 |
+
if not os.path.exists(EVAL_CSV):
|
463 |
+
return jsonify({"error": "No evaluation data available yet"}), 404
|
464 |
+
|
465 |
+
with open(EVAL_CSV, 'r') as f:
|
466 |
+
reader = csv.DictReader(f)
|
467 |
+
rows = list(reader)
|
468 |
+
|
469 |
+
return jsonify({
|
470 |
+
"count": len(rows),
|
471 |
+
"data": rows
|
472 |
+
})
|
473 |
+
|
474 |
+
except Exception as e:
|
475 |
+
logger.error(f"Error downloading evaluation data: {str(e)}", exc_info=True)
|
476 |
+
return jsonify({
|
477 |
+
"error": "Processing error",
|
478 |
+
"message": f"An error occurred: {str(e)}"
|
479 |
+
}), 500
|
480 |
+
|
481 |
+
#################################################
|
482 |
+
# Book Recommender Routes
|
483 |
+
#################################################
|
484 |
+
|
485 |
+
@app.route('/api/recommend', methods=['POST'])
|
486 |
+
def recommend():
|
487 |
+
"""Endpoint to get book recommendations based on user query."""
|
488 |
+
global recommender_model_loaded
|
489 |
+
|
490 |
+
if not recommender_model_loaded:
|
491 |
+
return jsonify({
|
492 |
+
"error": "Model not loaded",
|
493 |
+
"message": "The recommendation model is not properly loaded."
|
494 |
+
}), 503
|
495 |
+
|
496 |
+
data = request.get_json()
|
497 |
+
|
498 |
+
if not data:
|
499 |
+
return jsonify({
|
500 |
+
"error": "Invalid request",
|
501 |
+
"message": "No JSON data provided."
|
502 |
+
}), 400
|
503 |
+
|
504 |
+
query = data.get('query')
|
505 |
+
top_n = data.get('top_n', 5)
|
506 |
+
include_description = data.get('include_description', True)
|
507 |
+
threshold = data.get('threshold', 0.5) # default threshold
|
508 |
+
|
509 |
+
if not query:
|
510 |
+
return jsonify({
|
511 |
+
"error": "Missing parameter",
|
512 |
+
"message": "Query parameter is required."
|
513 |
+
}), 400
|
514 |
+
|
515 |
+
try:
|
516 |
+
# Get recommendations
|
517 |
+
recommendations = recommender.recommend_books(
|
518 |
+
user_query=query,
|
519 |
+
top_n=int(top_n),
|
520 |
+
include_description=bool(include_description)
|
521 |
+
)
|
522 |
+
|
523 |
+
# Clean recommendations to make it JSON serializable
|
524 |
+
def clean_np(obj):
|
525 |
+
if isinstance(obj, np.integer):
|
526 |
+
return int(obj)
|
527 |
+
elif isinstance(obj, np.floating):
|
528 |
+
return float(obj)
|
529 |
+
elif isinstance(obj, np.ndarray):
|
530 |
+
return obj.tolist()
|
531 |
+
elif isinstance(obj, dict):
|
532 |
+
return {k: clean_np(v) for k, v in obj.items()}
|
533 |
+
elif isinstance(obj, list):
|
534 |
+
return [clean_np(i) for i in obj]
|
535 |
+
else:
|
536 |
+
return obj
|
537 |
+
|
538 |
+
recommendations_clean = clean_np(recommendations)
|
539 |
+
|
540 |
+
# Split based on threshold
|
541 |
+
high_score = [rec for rec in recommendations_clean if rec['relevance_score'] >= threshold]
|
542 |
+
low_score = [rec for rec in recommendations_clean if rec['relevance_score'] < threshold]
|
543 |
+
|
544 |
+
return jsonify({
|
545 |
+
"query": query,
|
546 |
+
"threshold": threshold,
|
547 |
+
"high_recommendations": high_score,
|
548 |
+
"low_recommendations": low_score,
|
549 |
+
"total_count": len(recommendations_clean),
|
550 |
+
"high_count": len(high_score),
|
551 |
+
"low_count": len(low_score)
|
552 |
+
})
|
553 |
+
|
554 |
+
except Exception as e:
|
555 |
+
logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
|
556 |
+
return jsonify({
|
557 |
+
"error": "Processing error",
|
558 |
+
"message": f"An error occurred while processing your request: {str(e)}"
|
559 |
+
}), 500
|
560 |
+
|
561 |
+
#################################################
|
562 |
+
# Main
|
563 |
+
#################################################
|
564 |
+
|
565 |
+
if __name__ == '__main__':
|
566 |
+
# Initialize models when the app starts
|
567 |
+
models_loaded = initialize_models()
|
568 |
+
|
569 |
+
# Set port from environment variable or default to 5000
|
570 |
+
port = int(os.environ.get('PORT', 5000))
|
571 |
+
|
572 |
+
# For development use debug=True, for production use debug=False
|
573 |
+
app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
|
574 |
+
|
575 |
+
|
576 |
+
|
577 |
+
#curl -X POST http://localhost:5000/api/analyze \-H "Content-Type: application/json" \-d '{"text": "cariin buku", "method": "combined"}'
|
578 |
+
|
579 |
+
#curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
|
580 |
+
|
combined_api.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
evaluate_model.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
"""
|
3 |
+
Intent Classification Model Evaluator
|
4 |
+
|
5 |
+
This script analyzes the CSV data produced by the Flask app to generate
|
6 |
+
evaluation metrics and visualizations.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import pandas as pd
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
import numpy as np
|
12 |
+
import os
|
13 |
+
import argparse
|
14 |
+
from datetime import datetime
|
15 |
+
|
16 |
+
|
17 |
+
def load_evaluation_data(csv_path="model_evaluation.csv"):
|
18 |
+
"""Load the CSV data and do basic preprocessing"""
|
19 |
+
if not os.path.exists(csv_path):
|
20 |
+
print(f"Error: File {csv_path} not found")
|
21 |
+
return None
|
22 |
+
|
23 |
+
# Load the data
|
24 |
+
df = pd.read_csv(csv_path)
|
25 |
+
|
26 |
+
# Convert string boolean to actual boolean
|
27 |
+
#df['is_ood'] = df['is_ood'].apply(lambda x: x.lower() == 'true')
|
28 |
+
df['is_ood'] = df['is_ood'].apply(lambda x: str(x).lower() == 'true')
|
29 |
+
|
30 |
+
# Convert timestamp to datetime
|
31 |
+
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
32 |
+
|
33 |
+
print(f"Loaded {len(df)} evaluation records")
|
34 |
+
return df
|
35 |
+
|
36 |
+
|
37 |
+
def generate_basic_stats(df):
|
38 |
+
"""Generate basic statistics from the evaluation data"""
|
39 |
+
if df is None or len(df) == 0:
|
40 |
+
return "No data available for analysis"
|
41 |
+
|
42 |
+
stats = {
|
43 |
+
"total_queries": len(df),
|
44 |
+
"unique_queries": df['input_text'].nunique(),
|
45 |
+
"in_distribution_count": (~df['is_ood']).sum(),
|
46 |
+
"out_of_distribution_count": df['is_ood'].sum(),
|
47 |
+
"ood_percentage": df['is_ood'].mean() * 100,
|
48 |
+
"avg_confidence": df['confidence'].mean(),
|
49 |
+
"avg_energy_score": df['energy_score'].mean(),
|
50 |
+
"top_intents": df['predicted_intent'].value_counts().head(10).to_dict()
|
51 |
+
}
|
52 |
+
|
53 |
+
# Calculate metrics grouped by detection method
|
54 |
+
method_stats = df.groupby('detection_method').agg({
|
55 |
+
'is_ood': ['mean', 'count'],
|
56 |
+
'confidence': ['mean', 'std'],
|
57 |
+
'energy_score': ['mean', 'std']
|
58 |
+
})
|
59 |
+
|
60 |
+
return stats, method_stats
|
61 |
+
|
62 |
+
|
63 |
+
def plot_distributions(df, output_dir="evaluation_plots"):
|
64 |
+
"""Create plots for analyzing the model performance"""
|
65 |
+
if df is None or len(df) == 0:
|
66 |
+
print("No data available for plotting")
|
67 |
+
return
|
68 |
+
|
69 |
+
# Create output directory if it doesn't exist
|
70 |
+
os.makedirs(output_dir, exist_ok=True)
|
71 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
72 |
+
|
73 |
+
# Plot 1: Confidence Distribution
|
74 |
+
plt.figure(figsize=(10, 6))
|
75 |
+
plt.hist(df[~df['is_ood']]['confidence'], bins=20, alpha=0.7, label='In-Distribution')
|
76 |
+
plt.hist(df[df['is_ood']]['confidence'], bins=20, alpha=0.7, label='Out-of-Distribution')
|
77 |
+
plt.xlabel('Confidence Score')
|
78 |
+
plt.ylabel('Count')
|
79 |
+
plt.title('Confidence Score Distribution')
|
80 |
+
plt.legend()
|
81 |
+
plt.savefig(os.path.join(output_dir, f"{timestamp}_confidence_distribution.png"))
|
82 |
+
|
83 |
+
# Plot 2: Energy Score Distribution
|
84 |
+
plt.figure(figsize=(10, 6))
|
85 |
+
plt.hist(df[~df['is_ood']]['energy_score'], bins=20, alpha=0.7, label='In-Distribution')
|
86 |
+
plt.hist(df[df['is_ood']]['energy_score'], bins=20, alpha=0.7, label='Out-of-Distribution')
|
87 |
+
plt.xlabel('Energy Score')
|
88 |
+
plt.ylabel('Count')
|
89 |
+
plt.title('Energy Score Distribution')
|
90 |
+
plt.legend()
|
91 |
+
plt.savefig(os.path.join(output_dir, f"{timestamp}_energy_distribution.png"))
|
92 |
+
|
93 |
+
# Plot 3: Intent Distribution (Top 10)
|
94 |
+
intent_counts = df['predicted_intent'].value_counts().head(10)
|
95 |
+
plt.figure(figsize=(12, 6))
|
96 |
+
intent_counts.plot(kind='bar')
|
97 |
+
plt.xlabel('Intent')
|
98 |
+
plt.ylabel('Count')
|
99 |
+
plt.title('Top 10 Predicted Intents')
|
100 |
+
plt.xticks(rotation=45, ha='right')
|
101 |
+
plt.tight_layout()
|
102 |
+
plt.savefig(os.path.join(output_dir, f"{timestamp}_intent_distribution.png"))
|
103 |
+
|
104 |
+
# Plot 4: OOD Detection Method Comparison
|
105 |
+
plt.figure(figsize=(10, 6))
|
106 |
+
method_ood = df.groupby('detection_method')['is_ood'].mean() * 100
|
107 |
+
method_ood.plot(kind='bar')
|
108 |
+
plt.xlabel('Detection Method')
|
109 |
+
plt.ylabel('OOD Percentage')
|
110 |
+
plt.title('OOD Detection Rate by Method')
|
111 |
+
plt.savefig(os.path.join(output_dir, f"{timestamp}_ood_by_method.png"))
|
112 |
+
|
113 |
+
print(f"Plots saved to {output_dir} directory")
|
114 |
+
|
115 |
+
|
116 |
+
def analyze_inputs(df):
|
117 |
+
"""Analyze input texts for patterns"""
|
118 |
+
if df is None or len(df) == 0:
|
119 |
+
return "No data available for analysis"
|
120 |
+
|
121 |
+
# Basic text statistics
|
122 |
+
df['text_length'] = df['input_text'].apply(len)
|
123 |
+
df['word_count'] = df['input_text'].apply(lambda x: len(x.split()))
|
124 |
+
|
125 |
+
text_stats = {
|
126 |
+
"avg_text_length": df['text_length'].mean(),
|
127 |
+
"avg_word_count": df['word_count'].mean(),
|
128 |
+
"max_text_length": df['text_length'].max(),
|
129 |
+
"min_text_length": df['text_length'].min()
|
130 |
+
}
|
131 |
+
|
132 |
+
# Analyze correlation between text length and predictions
|
133 |
+
length_vs_ood = df.groupby(pd.cut(df['text_length'], 10))['is_ood'].mean()
|
134 |
+
length_vs_confidence = df.groupby(pd.cut(df['text_length'], 10))['confidence'].mean()
|
135 |
+
|
136 |
+
print("\nInput Text Analysis:")
|
137 |
+
print(f"Average text length: {text_stats['avg_text_length']:.1f} characters")
|
138 |
+
print(f"Average word count: {text_stats['avg_word_count']:.1f} words")
|
139 |
+
|
140 |
+
return text_stats, length_vs_ood, length_vs_confidence
|
141 |
+
|
142 |
+
|
143 |
+
def suggest_thresholds(df):
|
144 |
+
"""Analyze the data to suggest optimal thresholds for OOD detection"""
|
145 |
+
if df is None or len(df) == 0 or len(df['is_ood'].unique()) < 2:
|
146 |
+
return "Insufficient data for threshold analysis - need both OOD and non-OOD examples"
|
147 |
+
|
148 |
+
# Simple suggestion based on average values
|
149 |
+
suggested_energy = np.mean([
|
150 |
+
df[df['is_ood']]['energy_score'].mean(),
|
151 |
+
df[~df['is_ood']]['energy_score'].mean()
|
152 |
+
])
|
153 |
+
|
154 |
+
suggested_msp = np.mean([
|
155 |
+
df[df['is_ood']]['confidence'].mean(),
|
156 |
+
df[~df['is_ood']]['confidence'].mean()
|
157 |
+
])
|
158 |
+
|
159 |
+
print("\nThreshold Suggestions:")
|
160 |
+
print(f"Current data suggests an energy threshold around: {suggested_energy:.4f}")
|
161 |
+
print(f"Current data suggests an MSP threshold around: {suggested_msp:.4f}")
|
162 |
+
print("Note: These are rough estimates. For proper threshold tuning,")
|
163 |
+
print("you should use a dedicated validation set and ROC curve analysis.")
|
164 |
+
|
165 |
+
return suggested_energy, suggested_msp
|
166 |
+
|
167 |
+
|
168 |
+
def main():
|
169 |
+
parser = argparse.ArgumentParser(description="Analyze intent classification evaluation data")
|
170 |
+
parser.add_argument('--csv', default='model_evaluation.csv', help='Path to the evaluation CSV file')
|
171 |
+
parser.add_argument('--plots', default='evaluation_plots', help='Directory to save plots')
|
172 |
+
parser.add_argument('--no-plots', action='store_true', help='Skip generating plots')
|
173 |
+
args = parser.parse_args()
|
174 |
+
|
175 |
+
print(f"Loading data from {args.csv}...")
|
176 |
+
df = load_evaluation_data(args.csv)
|
177 |
+
|
178 |
+
if df is not None and len(df) > 0:
|
179 |
+
print("\n===== BASIC STATISTICS =====")
|
180 |
+
stats, method_stats = generate_basic_stats(df)
|
181 |
+
print(f"Total queries: {stats['total_queries']}")
|
182 |
+
print(f"In-distribution queries: {stats['in_distribution_count']} ({100-stats['ood_percentage']:.1f}%)")
|
183 |
+
print(f"Out-of-distribution queries: {stats['out_of_distribution_count']} ({stats['ood_percentage']:.1f}%)")
|
184 |
+
print(f"Average confidence score: {stats['avg_confidence']:.4f}")
|
185 |
+
print(f"Average energy score: {stats['avg_energy_score']:.4f}")
|
186 |
+
|
187 |
+
print("\nTop predicted intents:")
|
188 |
+
for intent, count in list(stats['top_intents'].items())[:5]:
|
189 |
+
print(f" - {intent}: {count}")
|
190 |
+
|
191 |
+
print("\n===== DETECTION METHOD COMPARISON =====")
|
192 |
+
print(method_stats)
|
193 |
+
|
194 |
+
# Analyze input texts
|
195 |
+
analyze_inputs(df)
|
196 |
+
|
197 |
+
# Suggest threshold values
|
198 |
+
suggest_thresholds(df)
|
199 |
+
|
200 |
+
# Generate plots if not disabled
|
201 |
+
if not args.no_plots:
|
202 |
+
plot_distributions(df, args.plots)
|
203 |
+
|
204 |
+
print("\nAnalysis complete!")
|
205 |
+
|
206 |
+
|
207 |
+
if __name__ == "__main__":
|
208 |
+
main()
|
evaluation_plots/20250515_142829_confidence_distribution.png
ADDED
![]() |
evaluation_plots/20250515_142829_energy_distribution.png
ADDED
![]() |
evaluation_plots/20250515_142829_intent_distribution.png
ADDED
![]() |
evaluation_plots/20250515_142829_ood_by_method.png
ADDED
![]() |
hf.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import login, create_repo, upload_folder
|
2 |
+
|
3 |
+
# 1. 🔐 Login using your HF token (get it from https://huggingface.co/settings/tokens)
|
4 |
+
login("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") # <--- ganti dengan token kamu
|
5 |
+
|
6 |
+
# 2. 📁 Create repo on Hugging Face (if belum dibuat). Ganti nama repo sesuai keinginan.
|
7 |
+
repo_name = "Bipa-Classification" # bebas, asal unik di akunmu
|
8 |
+
create_repo(repo_name, private=False)
|
9 |
+
|
10 |
+
# 3. 🚀 Upload the model folder
|
11 |
+
upload_folder(
|
12 |
+
folder_path="./model", # this path is correct from your working dir
|
13 |
+
path_in_repo="", # upload everything into root of repo
|
14 |
+
repo_id="ZEROTSUDIOS/" + repo_name, # <--- ganti your_username
|
15 |
+
repo_type="model"
|
16 |
+
)
|
17 |
+
|
18 |
+
print("✅ Upload completed!")
|
intent_api.log
ADDED
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-05-10 20:51:10,272 - __main__ - INFO - Created evaluation CSV file: model_evaluation.csv
|
2 |
+
2025-05-10 20:51:10,275 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
|
3 |
+
2025-05-10 20:52:12,106 - __main__ - INFO - Intent resources loaded successfully
|
4 |
+
2025-05-10 20:52:12,108 - __main__ - INFO - Loaded 9 intent classes
|
5 |
+
2025-05-10 20:52:12,113 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
|
6 |
+
2025-05-10 20:52:12,114 - __main__ - INFO - Intent classification model loaded successfully!
|
7 |
+
2025-05-10 20:52:13,087 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
8 |
+
* Running on all addresses (0.0.0.0)
|
9 |
+
* Running on http://127.0.0.1:5000
|
10 |
+
* Running on http://192.168.1.9:5000
|
11 |
+
2025-05-10 20:52:13,088 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
12 |
+
2025-05-10 20:52:13,148 - werkzeug - INFO - * Restarting with stat
|
13 |
+
2025-05-10 20:52:43,569 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
|
14 |
+
2025-05-10 20:53:35,798 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
|
15 |
+
2025-05-10 20:53:38,986 - __main__ - INFO - Intent resources loaded successfully
|
16 |
+
2025-05-10 20:53:38,986 - __main__ - INFO - Loaded 9 intent classes
|
17 |
+
2025-05-10 20:53:38,986 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
|
18 |
+
2025-05-10 20:53:38,986 - __main__ - INFO - Intent classification model loaded successfully!
|
19 |
+
2025-05-10 20:53:39,220 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
20 |
+
* Running on all addresses (0.0.0.0)
|
21 |
+
* Running on http://127.0.0.1:5000
|
22 |
+
* Running on http://192.168.1.9:5000
|
23 |
+
2025-05-10 20:53:39,220 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
24 |
+
2025-05-10 20:53:39,236 - werkzeug - INFO - * Restarting with stat
|
25 |
+
2025-05-10 20:53:49,002 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
|
26 |
+
2025-05-10 21:03:09,865 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
|
27 |
+
2025-05-10 21:03:13,006 - __main__ - INFO - Intent resources loaded successfully
|
28 |
+
2025-05-10 21:03:13,006 - __main__ - INFO - Loaded 9 intent classes
|
29 |
+
2025-05-10 21:03:13,006 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
|
30 |
+
2025-05-10 21:03:13,006 - __main__ - INFO - Intent classification model loaded successfully!
|
31 |
+
2025-05-10 21:03:13,037 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
32 |
+
* Running on all addresses (0.0.0.0)
|
33 |
+
* Running on http://127.0.0.1:5000
|
34 |
+
* Running on http://192.168.1.9:5000
|
35 |
+
2025-05-10 21:03:13,037 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
36 |
+
2025-05-10 21:04:34,421 - __main__ - INFO -
|
37 |
+
========== INTENT PREDICTION DEBUG ==========
|
38 |
+
2025-05-10 21:04:34,421 - __main__ - INFO - Input Text: hi
|
39 |
+
2025-05-10 21:04:34,421 - __main__ - INFO - Detection Method: combined
|
40 |
+
2025-05-10 21:04:43,102 - __main__ - INFO - Logits: [[-1.3176556825637817, -1.1946855783462524, -0.858184278011322, -1.1091588735580444, -1.0160118341445923, 1.3968093395233154, 5.099667072296143, -1.0186958312988281, -0.7325793504714966]]
|
41 |
+
2025-05-10 21:04:43,618 - __main__ - INFO - Softmax Probabilities: [[0.0015701063675805926, 0.0017755558947101235, 0.0024858498945832253, 0.0019340959843248129, 0.0021229088306427, 0.023703157901763916, 0.9614725708961487, 0.0021172184497117996, 0.0028185418341308832]]
|
42 |
+
2025-05-10 21:04:43,633 - __main__ - INFO - Max Probability (Confidence): 0.9615
|
43 |
+
2025-05-10 21:04:43,633 - __main__ - INFO - Predicted Index: 6
|
44 |
+
2025-05-10 21:04:44,086 - __main__ - INFO - Energy Score: -5.1390
|
45 |
+
2025-05-10 21:04:44,086 - __main__ - INFO - OOD Detection -> is_ood: False
|
46 |
+
2025-05-10 21:04:44,086 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
47 |
+
2025-05-10 21:04:44,086 - __main__ - INFO - Predicted Intent: greeting
|
48 |
+
2025-05-10 21:04:44,086 - __main__ - INFO - =============================================
|
49 |
+
|
50 |
+
2025-05-10 21:04:44,086 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:04:44] "POST /api/analyze HTTP/1.1" 200 -
|
51 |
+
2025-05-10 21:10:56,497 - __main__ - INFO -
|
52 |
+
========== INTENT PREDICTION DEBUG ==========
|
53 |
+
2025-05-10 21:10:56,498 - __main__ - INFO - Input Text: selamat pagi, saya inginn tau cara meminjam buku boleh? makasih
|
54 |
+
2025-05-10 21:10:56,499 - __main__ - INFO - Detection Method: combined
|
55 |
+
2025-05-10 21:10:58,421 - __main__ - INFO - Logits: [[5.399814605712891, 0.087623231112957, -0.8307769298553467, -0.6626364588737488, -0.7578122615814209, 0.10144484788179398, -0.7953941226005554, -0.3702244460582733, -1.1888351440429688]]
|
56 |
+
2025-05-10 21:10:58,423 - __main__ - INFO - Softmax Probabilities: [[0.9776320457458496, 0.0048208096995949745, 0.001924260170198977, 0.002276598708704114, 0.0020699123851954937, 0.0048879035748541355, 0.0019935655873268843, 0.0030498558189719915, 0.0013451204868033528]]
|
57 |
+
2025-05-10 21:10:58,424 - __main__ - INFO - Max Probability (Confidence): 0.9776
|
58 |
+
2025-05-10 21:10:58,427 - __main__ - INFO - Predicted Index: 0
|
59 |
+
2025-05-10 21:10:58,430 - __main__ - INFO - Energy Score: -5.4224
|
60 |
+
2025-05-10 21:10:58,432 - __main__ - INFO - OOD Detection -> is_ood: False
|
61 |
+
2025-05-10 21:10:58,437 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
62 |
+
2025-05-10 21:10:58,440 - __main__ - INFO - Predicted Intent: cara_pinjam
|
63 |
+
2025-05-10 21:10:58,442 - __main__ - INFO - =============================================
|
64 |
+
|
65 |
+
2025-05-10 21:10:58,449 - test - ERROR - Exception on /api/analyze [POST]
|
66 |
+
Traceback (most recent call last):
|
67 |
+
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 1473, in wsgi_app
|
68 |
+
response = self.full_dispatch_request()
|
69 |
+
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 882, in full_dispatch_request
|
70 |
+
rv = self.handle_user_exception(e)
|
71 |
+
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask_cors\extension.py", line 176, in wrapped_function
|
72 |
+
return cors_after_request(app.make_response(f(*args, **kwargs)))
|
73 |
+
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 880, in full_dispatch_request
|
74 |
+
rv = self.dispatch_request()
|
75 |
+
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 865, in dispatch_request
|
76 |
+
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
|
77 |
+
File "C:\xampp\htdocs\ChatbotPerpusBipa\py\test.py", line 234, in analyze
|
78 |
+
save_prediction_to_csv(text, result, method)
|
79 |
+
File "C:\xampp\htdocs\ChatbotPerpusBipa\py\test.py", line 52, in save_prediction_to_csv
|
80 |
+
with open(EVAL_CSV, 'a', newline='') as f:
|
81 |
+
PermissionError: [Errno 13] Permission denied: 'model_evaluation.csv'
|
82 |
+
2025-05-10 21:11:03,159 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:11:03] "[35m[1mPOST /api/analyze HTTP/1.1[0m" 500 -
|
83 |
+
2025-05-10 21:11:30,805 - __main__ - INFO -
|
84 |
+
========== INTENT PREDICTION DEBUG ==========
|
85 |
+
2025-05-10 21:11:30,806 - __main__ - INFO - Input Text: selamat pagi, saya inginn tau cara meminjam buku boleh? makasih
|
86 |
+
2025-05-10 21:11:30,807 - __main__ - INFO - Detection Method: combined
|
87 |
+
2025-05-10 21:11:32,361 - __main__ - INFO - Logits: [[5.399814605712891, 0.087623231112957, -0.8307769298553467, -0.6626364588737488, -0.7578122615814209, 0.10144484788179398, -0.7953941226005554, -0.3702244460582733, -1.1888351440429688]]
|
88 |
+
2025-05-10 21:11:32,365 - __main__ - INFO - Softmax Probabilities: [[0.9776320457458496, 0.0048208096995949745, 0.001924260170198977, 0.002276598708704114, 0.0020699123851954937, 0.0048879035748541355, 0.0019935655873268843, 0.0030498558189719915, 0.0013451204868033528]]
|
89 |
+
2025-05-10 21:11:32,370 - __main__ - INFO - Max Probability (Confidence): 0.9776
|
90 |
+
2025-05-10 21:11:32,373 - __main__ - INFO - Predicted Index: 0
|
91 |
+
2025-05-10 21:11:32,379 - __main__ - INFO - Energy Score: -5.4224
|
92 |
+
2025-05-10 21:11:32,387 - __main__ - INFO - OOD Detection -> is_ood: False
|
93 |
+
2025-05-10 21:11:32,390 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
94 |
+
2025-05-10 21:11:32,393 - __main__ - INFO - Predicted Intent: cara_pinjam
|
95 |
+
2025-05-10 21:11:32,397 - __main__ - INFO - =============================================
|
96 |
+
|
97 |
+
2025-05-10 21:11:32,412 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:11:32] "POST /api/analyze HTTP/1.1" 200 -
|
98 |
+
2025-05-10 21:20:06,316 - __main__ - INFO -
|
99 |
+
========== INTENT PREDICTION DEBUG ==========
|
100 |
+
2025-05-10 21:20:06,316 - __main__ - INFO - Input Text: hello nigga
|
101 |
+
2025-05-10 21:20:06,316 - __main__ - INFO - Detection Method: combined
|
102 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Logits: [[-1.059308648109436, -1.1259849071502686, -0.7732775807380676, -1.1819493770599365, -1.1710785627365112, 1.4521418809890747, 5.11907958984375, -1.0589956045150757, -1.0377000570297241]]
|
103 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Softmax Probabilities: [[0.0019936026073992252, 0.0018650107085704803, 0.0026537510566413403, 0.0017635031836107373, 0.001782778650522232, 0.02456674538552761, 0.9613432288169861, 0.001994226360693574, 0.002037149854004383]]
|
104 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Max Probability (Confidence): 0.9613
|
105 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Predicted Index: 6
|
106 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Energy Score: -5.1585
|
107 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - OOD Detection -> is_ood: False
|
108 |
+
2025-05-10 21:20:07,046 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
109 |
+
2025-05-10 21:20:07,062 - __main__ - INFO - Predicted Intent: greeting
|
110 |
+
2025-05-10 21:20:07,062 - __main__ - INFO - =============================================
|
111 |
+
|
112 |
+
2025-05-10 21:20:07,062 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:20:07] "POST /api/analyze HTTP/1.1" 200 -
|
113 |
+
2025-05-10 21:20:12,732 - __main__ - INFO -
|
114 |
+
========== INTENT PREDICTION DEBUG ==========
|
115 |
+
2025-05-10 21:20:12,732 - __main__ - INFO - Input Text: jawir
|
116 |
+
2025-05-10 21:20:12,732 - __main__ - INFO - Detection Method: combined
|
117 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Logits: [[-1.6685775518417358, -1.273998737335205, 2.2453248500823975, 3.137083053588867, -0.7543082237243652, -1.4736096858978271, 2.1617510318756104, -1.6372283697128296, -1.1733094453811646]]
|
118 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Softmax Probabilities: [[0.004400658421218395, 0.006529518403112888, 0.2204468846321106, 0.537761390209198, 0.010979431681334972, 0.0053479960188269615, 0.20277215540409088, 0.004540801048278809, 0.007221210282295942]]
|
119 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Max Probability (Confidence): 0.5378
|
120 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Predicted Index: 3
|
121 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Energy Score: -3.7574
|
122 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - OOD Detection -> is_ood: True
|
123 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
|
124 |
+
2025-05-10 21:20:13,156 - __main__ - INFO - Predicted Intent: unknown
|
125 |
+
2025-05-10 21:20:13,171 - __main__ - INFO - =============================================
|
126 |
+
|
127 |
+
2025-05-10 21:20:13,249 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:20:13] "POST /api/analyze HTTP/1.1" 200 -
|
128 |
+
2025-05-10 21:23:46,590 - __main__ - INFO -
|
129 |
+
========== INTENT PREDICTION DEBUG ==========
|
130 |
+
2025-05-10 21:23:46,592 - __main__ - INFO - Input Text: aku pergi
|
131 |
+
2025-05-10 21:23:46,593 - __main__ - INFO - Detection Method: combined
|
132 |
+
2025-05-10 21:23:49,019 - __main__ - INFO - Logits: [[-0.9422286748886108, -0.8617181777954102, -1.3514978885650635, 0.15106375515460968, -1.3091967105865479, 4.960031509399414, 0.3698965013027191, -0.15400801599025726, -0.9507330656051636]]
|
133 |
+
2025-05-10 21:23:49,113 - __main__ - INFO - Softmax Probabilities: [[0.0026371763087809086, 0.002858277875930071, 0.0017514426726847887, 0.007869554683566093, 0.0018271200824528933, 0.9648464918136597, 0.009794626384973526, 0.005800415761768818, 0.0026148436591029167]]
|
134 |
+
2025-05-10 21:23:49,128 - __main__ - INFO - Max Probability (Confidence): 0.9648
|
135 |
+
2025-05-10 21:23:49,128 - __main__ - INFO - Predicted Index: 5
|
136 |
+
2025-05-10 21:23:49,128 - __main__ - INFO - Energy Score: -4.9958
|
137 |
+
2025-05-10 21:23:49,144 - __main__ - INFO - OOD Detection -> is_ood: False
|
138 |
+
2025-05-10 21:23:49,144 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
139 |
+
2025-05-10 21:23:49,144 - __main__ - INFO - Predicted Intent: goodbye
|
140 |
+
2025-05-10 21:23:49,159 - __main__ - INFO - =============================================
|
141 |
+
|
142 |
+
2025-05-10 21:23:49,175 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:23:49] "POST /api/analyze HTTP/1.1" 200 -
|
143 |
+
2025-05-10 21:23:55,322 - __main__ - INFO -
|
144 |
+
========== INTENT PREDICTION DEBUG ==========
|
145 |
+
2025-05-10 21:23:55,322 - __main__ - INFO - Input Text: ga mau
|
146 |
+
2025-05-10 21:23:55,322 - __main__ - INFO - Detection Method: combined
|
147 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Logits: [[-0.9056248068809509, -0.013884905725717545, 0.027644459158182144, 5.6327104568481445, -0.8719817996025085, -0.5354712009429932, -0.7891335487365723, -0.8269177079200745, -0.5340971946716309]]
|
148 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Softmax Probabilities: [[0.0014219597214832902, 0.003468685783445835, 0.0036157723516225815, 0.9827662706375122, 0.0014706128276884556, 0.0020589372143149376, 0.0015976395225152373, 0.0015384004218503833, 0.002061767503619194]]
|
149 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Max Probability (Confidence): 0.9828
|
150 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Predicted Index: 3
|
151 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Energy Score: -5.6501
|
152 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - OOD Detection -> is_ood: False
|
153 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
154 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - Predicted Intent: denied
|
155 |
+
2025-05-10 21:23:55,676 - __main__ - INFO - =============================================
|
156 |
+
|
157 |
+
2025-05-10 21:23:55,676 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:23:55] "POST /api/analyze HTTP/1.1" 200 -
|
158 |
+
2025-05-10 21:24:02,159 - __main__ - INFO -
|
159 |
+
========== INTENT PREDICTION DEBUG ==========
|
160 |
+
2025-05-10 21:24:02,159 - __main__ - INFO - Input Text: cariin buku :)
|
161 |
+
2025-05-10 21:24:02,159 - __main__ - INFO - Detection Method: combined
|
162 |
+
2025-05-10 21:24:02,801 - __main__ - INFO - Logits: [[-0.4540617763996124, 5.4417572021484375, -0.9955655336380005, -0.5665326714515686, 0.22001151740550995, -0.4966876804828644, -0.975710391998291, -0.5621989965438843, -1.544423222541809]]
|
163 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Softmax Probabilities: [[0.002697325311601162, 0.9805168509483337, 0.0015695001929998398, 0.002410393673926592, 0.005292730871587992, 0.002584765199571848, 0.001600974122993648, 0.002420861506834626, 0.0009065577760338783]]
|
164 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Max Probability (Confidence): 0.9805
|
165 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Predicted Index: 1
|
166 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Energy Score: -5.4614
|
167 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - OOD Detection -> is_ood: False
|
168 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
169 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - Predicted Intent: cari_buku
|
170 |
+
2025-05-10 21:24:02,817 - __main__ - INFO - =============================================
|
171 |
+
|
172 |
+
2025-05-10 21:24:02,832 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:02] "POST /api/analyze HTTP/1.1" 200 -
|
173 |
+
2025-05-10 21:24:08,582 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:08] "[33mPOST /api/recommend HTTP/1.1[0m" 404 -
|
174 |
+
2025-05-10 21:24:24,640 - __main__ - INFO -
|
175 |
+
========== INTENT PREDICTION DEBUG ==========
|
176 |
+
2025-05-10 21:24:24,655 - __main__ - INFO - Input Text: engga
|
177 |
+
2025-05-10 21:24:24,655 - __main__ - INFO - Detection Method: combined
|
178 |
+
2025-05-10 21:24:24,971 - __main__ - INFO - Logits: [[-1.0960450172424316, -0.19882100820541382, 1.0277540683746338, 5.673742771148682, -0.9991940855979919, -0.5354593992233276, -1.1831791400909424, -0.9452515244483948, -0.6324564814567566]]
|
179 |
+
2025-05-10 21:24:24,971 - __main__ - INFO - Softmax Probabilities: [[0.0011242710752412677, 0.002757594920694828, 0.009402111172676086, 0.9793829321861267, 0.0012386050075292587, 0.001969383331015706, 0.0010304549941793084, 0.0013072536094114184, 0.0017873314209282398]]
|
180 |
+
2025-05-10 21:24:24,971 - __main__ - INFO - Max Probability (Confidence): 0.9794
|
181 |
+
2025-05-10 21:24:24,971 - __main__ - INFO - Predicted Index: 3
|
182 |
+
2025-05-10 21:24:24,986 - __main__ - INFO - Energy Score: -5.6946
|
183 |
+
2025-05-10 21:24:24,986 - __main__ - INFO - OOD Detection -> is_ood: False
|
184 |
+
2025-05-10 21:24:24,986 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
185 |
+
2025-05-10 21:24:24,986 - __main__ - INFO - Predicted Intent: denied
|
186 |
+
2025-05-10 21:24:24,986 - __main__ - INFO - =============================================
|
187 |
+
|
188 |
+
2025-05-10 21:24:25,002 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:25] "POST /api/analyze HTTP/1.1" 200 -
|
189 |
+
2025-05-10 21:24:35,923 - __main__ - INFO -
|
190 |
+
========== INTENT PREDICTION DEBUG ==========
|
191 |
+
2025-05-10 21:24:35,923 - __main__ - INFO - Input Text: kali ini gimana cara jadi anggota
|
192 |
+
2025-05-10 21:24:35,939 - __main__ - INFO - Detection Method: combined
|
193 |
+
2025-05-10 21:24:36,611 - __main__ - INFO - Logits: [[0.7106897234916687, -1.2784496545791626, -1.4926022291183472, -0.25034889578819275, -1.2656588554382324, -0.8868540525436401, 0.061691418290138245, -1.14139723777771, 5.139155864715576]]
|
194 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Softmax Probabilities: [[0.011566980741918087, 0.0015825150767341256, 0.0012774458155035973, 0.0044243172742426395, 0.0016028864774852991, 0.0023410762660205364, 0.0060445452108979225, 0.0018149681854993105, 0.969345211982727]]
|
195 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Max Probability (Confidence): 0.9693
|
196 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Predicted Index: 8
|
197 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Energy Score: -5.1703
|
198 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - OOD Detection -> is_ood: False
|
199 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
200 |
+
2025-05-10 21:24:36,627 - __main__ - INFO - Predicted Intent: keanggotaan
|
201 |
+
2025-05-10 21:24:36,643 - __main__ - INFO - =============================================
|
202 |
+
|
203 |
+
2025-05-10 21:24:36,643 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:36] "POST /api/analyze HTTP/1.1" 200 -
|
204 |
+
2025-05-10 21:24:40,108 - __main__ - INFO -
|
205 |
+
========== INTENT PREDICTION DEBUG ==========
|
206 |
+
2025-05-10 21:24:40,108 - __main__ - INFO - Input Text: apakah gw anggota
|
207 |
+
2025-05-10 21:24:40,108 - __main__ - INFO - Detection Method: combined
|
208 |
+
2025-05-10 21:24:40,612 - __main__ - INFO - Logits: [[-1.2273778915405273, -1.939422369003296, -1.204856276512146, -0.7936017513275146, -0.8146175742149353, -0.17088937759399414, 1.614134669303894, -1.2518943548202515, 4.8576788902282715]]
|
209 |
+
2025-05-10 21:24:40,613 - __main__ - INFO - Softmax Probabilities: [[0.002146817045286298, 0.0010533147724345326, 0.002195715205743909, 0.0033126971684396267, 0.003243803745135665, 0.006174789741635323, 0.036800041794776917, 0.002094824332743883, 0.9429781436920166]]
|
210 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - Max Probability (Confidence): 0.9430
|
211 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - Predicted Index: 8
|
212 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - Energy Score: -4.9164
|
213 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - OOD Detection -> is_ood: False
|
214 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
215 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - Predicted Intent: keanggotaan
|
216 |
+
2025-05-10 21:24:40,672 - __main__ - INFO - =============================================
|
217 |
+
|
218 |
+
2025-05-10 21:24:40,687 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:40] "POST /api/analyze HTTP/1.1" 200 -
|
219 |
+
2025-05-10 21:28:41,246 - __main__ - INFO -
|
220 |
+
========== INTENT PREDICTION DEBUG ==========
|
221 |
+
2025-05-10 21:28:41,246 - __main__ - INFO - Input Text: apa iyah?
|
222 |
+
2025-05-10 21:28:41,262 - __main__ - INFO - Detection Method: combined
|
223 |
+
2025-05-10 21:28:41,778 - __main__ - INFO - Logits: [[-0.7390039563179016, -2.113006353378296, 3.0566437244415283, 3.1234726905822754, -1.570651650428772, -1.8655962944030762, 1.214770793914795, -1.274828314781189, -0.047212935984134674]]
|
224 |
+
2025-05-10 21:28:41,778 - __main__ - INFO - Softmax Probabilities: [[0.009639445692300797, 0.0024396663065999746, 0.42902329564094543, 0.45867419242858887, 0.004196353256702423, 0.0031244901474565268, 0.06800887733697891, 0.005640873685479164, 0.01925276406109333]]
|
225 |
+
2025-05-10 21:28:41,778 - __main__ - INFO - Max Probability (Confidence): 0.4587
|
226 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - Predicted Index: 3
|
227 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - Energy Score: -3.9029
|
228 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - OOD Detection -> is_ood: True
|
229 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
|
230 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - Predicted Intent: unknown
|
231 |
+
2025-05-10 21:28:41,794 - __main__ - INFO - =============================================
|
232 |
+
|
233 |
+
2025-05-10 21:28:41,794 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:41] "POST /api/analyze HTTP/1.1" 200 -
|
234 |
+
2025-05-10 21:28:50,002 - __main__ - INFO -
|
235 |
+
========== INTENT PREDICTION DEBUG ==========
|
236 |
+
2025-05-10 21:28:50,006 - __main__ - INFO - Input Text: yaudah deh iya
|
237 |
+
2025-05-10 21:28:50,010 - __main__ - INFO - Detection Method: combined
|
238 |
+
2025-05-10 21:28:50,508 - __main__ - INFO - Logits: [[-0.8829267621040344, -1.275931477546692, 5.552294731140137, 0.9029282331466675, -0.8707864880561829, -0.06326564401388168, -1.2235711812973022, -1.3446561098098755, -1.2907097339630127]]
|
239 |
+
2025-05-10 21:28:50,509 - __main__ - INFO - Softmax Probabilities: [[0.0015714645851403475, 0.001060779090039432, 0.9796836376190186, 0.009373282082378864, 0.0015906589105725288, 0.003566801082342863, 0.0011178012937307358, 0.0009903260506689548, 0.0010452179703861475]]
|
240 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - Max Probability (Confidence): 0.9797
|
241 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - Predicted Index: 2
|
242 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - Energy Score: -5.5728
|
243 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - OOD Detection -> is_ood: False
|
244 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
245 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - Predicted Intent: confirm
|
246 |
+
2025-05-10 21:28:50,514 - __main__ - INFO - =============================================
|
247 |
+
|
248 |
+
2025-05-10 21:28:50,534 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:50] "POST /api/analyze HTTP/1.1" 200 -
|
249 |
+
2025-05-10 21:28:58,131 - __main__ - INFO -
|
250 |
+
========== INTENT PREDICTION DEBUG ==========
|
251 |
+
2025-05-10 21:28:58,131 - __main__ - INFO - Input Text: apa bedanya cari buku sama pinjam buku
|
252 |
+
2025-05-10 21:28:58,131 - __main__ - INFO - Detection Method: combined
|
253 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - Logits: [[5.502951622009277, 1.039902925491333, -1.1021047830581665, -0.5494447946548462, -0.45617687702178955, -0.8856024742126465, -1.2792505025863647, -0.7736234068870544, -0.5327207446098328]]
|
254 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - Softmax Probabilities: [[0.9757035970687866, 0.01124709565192461, 0.0013206215808168054, 0.002295068232342601, 0.0025194245390594006, 0.0016398499719798565, 0.0011062286794185638, 0.0018341547111049294, 0.00233377399854362]]
|
255 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - Max Probability (Confidence): 0.9757
|
256 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - Predicted Index: 0
|
257 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - Energy Score: -5.5275
|
258 |
+
2025-05-10 21:28:58,711 - __main__ - INFO - OOD Detection -> is_ood: False
|
259 |
+
2025-05-10 21:28:58,727 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
260 |
+
2025-05-10 21:28:58,727 - __main__ - INFO - Predicted Intent: cara_pinjam
|
261 |
+
2025-05-10 21:28:58,727 - __main__ - INFO - =============================================
|
262 |
+
|
263 |
+
2025-05-10 21:28:58,885 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:58] "POST /api/analyze HTTP/1.1" 200 -
|
264 |
+
2025-05-10 21:37:55,693 - __main__ - INFO -
|
265 |
+
========== INTENT PREDICTION DEBUG ==========
|
266 |
+
2025-05-10 21:38:01,389 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda di perpustakaan?
|
267 |
+
2025-05-10 21:38:01,391 - __main__ - INFO - Detection Method: combined
|
268 |
+
2025-05-10 21:38:04,009 - __main__ - INFO - Logits: [[-0.7697362899780273, 1.3467382192611694, -1.27949059009552, -1.299091100692749, 5.202969551086426, -1.3397959470748901, -0.9824981689453125, -0.4959643483161926, -0.7740484476089478]]
|
269 |
+
2025-05-10 21:38:04,025 - __main__ - INFO - Softmax Probabilities: [[0.002458558650687337, 0.020410509780049324, 0.0014767165994271636, 0.0014480534009635448, 0.9651476740837097, 0.0013902944047003984, 0.0019873722922056913, 0.003232794813811779, 0.0024479799903929234]]
|
270 |
+
2025-05-10 21:38:04,041 - __main__ - INFO - Max Probability (Confidence): 0.9651
|
271 |
+
2025-05-10 21:38:04,041 - __main__ - INFO - Predicted Index: 4
|
272 |
+
2025-05-10 21:38:04,056 - __main__ - INFO - Energy Score: -5.2384
|
273 |
+
2025-05-10 21:38:04,056 - __main__ - INFO - OOD Detection -> is_ood: False
|
274 |
+
2025-05-10 21:38:04,056 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
275 |
+
2025-05-10 21:38:04,056 - __main__ - INFO - Predicted Intent: fasilitas
|
276 |
+
2025-05-10 21:38:04,056 - __main__ - INFO - =============================================
|
277 |
+
|
278 |
+
2025-05-10 21:38:04,119 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:04] "POST /api/analyze HTTP/1.1" 200 -
|
279 |
+
2025-05-10 21:38:14,947 - __main__ - INFO -
|
280 |
+
========== INTENT PREDICTION DEBUG ==========
|
281 |
+
2025-05-10 21:38:14,947 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda yang ada di perpustakaan?
|
282 |
+
2025-05-10 21:38:14,947 - __main__ - INFO - Detection Method: combined
|
283 |
+
2025-05-10 21:38:15,790 - __main__ - INFO - Logits: [[-0.7680988907814026, 1.2449392080307007, -1.196992039680481, -1.266596794128418, 5.14818000793457, -1.350130319595337, -1.160045862197876, -0.40871661901474, -0.8047224283218384]]
|
284 |
+
2025-05-10 21:38:15,790 - __main__ - INFO - Softmax Probabilities: [[0.002601428423076868, 0.019474362954497337, 0.0016941269859671593, 0.0015802178531885147, 0.9652040600776672, 0.001453579985536635, 0.0017578894039615989, 0.003726400900632143, 0.002507878467440605]]
|
285 |
+
2025-05-10 21:38:15,790 - __main__ - INFO - Max Probability (Confidence): 0.9652
|
286 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - Predicted Index: 4
|
287 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - Energy Score: -5.1836
|
288 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - OOD Detection -> is_ood: False
|
289 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
290 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - Predicted Intent: fasilitas
|
291 |
+
2025-05-10 21:38:15,806 - __main__ - INFO - =============================================
|
292 |
+
|
293 |
+
2025-05-10 21:38:15,821 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:15] "POST /api/analyze HTTP/1.1" 200 -
|
294 |
+
2025-05-10 21:38:26,471 - __main__ - INFO -
|
295 |
+
========== INTENT PREDICTION DEBUG ==========
|
296 |
+
2025-05-10 21:38:26,475 - __main__ - INFO - Input Text: apa yang anda tahu tentang fasility yang ada di perpustakaan?
|
297 |
+
2025-05-10 21:38:26,477 - __main__ - INFO - Detection Method: combined
|
298 |
+
2025-05-10 21:38:27,385 - __main__ - INFO - Logits: [[-1.6402337551116943, 0.28838950395584106, -1.0334101915359497, -1.377264380455017, 5.436436653137207, -1.3821498155593872, -1.1186164617538452, -0.2397802472114563, -0.14053963124752045]]
|
299 |
+
2025-05-10 21:38:27,399 - __main__ - INFO - Softmax Probabilities: [[0.0008288080571219325, 0.005702228285372257, 0.0015205274103209376, 0.0010781027376651764, 0.9813252687454224, 0.0010728489141911268, 0.0013963347300887108, 0.00336250732652843, 0.003713324898853898]]
|
300 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - Max Probability (Confidence): 0.9813
|
301 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - Predicted Index: 4
|
302 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - Energy Score: -5.4553
|
303 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - OOD Detection -> is_ood: False
|
304 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
305 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - Predicted Intent: fasilitas
|
306 |
+
2025-05-10 21:38:27,414 - __main__ - INFO - =============================================
|
307 |
+
|
308 |
+
2025-05-10 21:38:27,414 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:27] "POST /api/analyze HTTP/1.1" 200 -
|
309 |
+
2025-05-10 21:38:40,887 - __main__ - INFO -
|
310 |
+
========== INTENT PREDICTION DEBUG ==========
|
311 |
+
2025-05-10 21:38:40,888 - __main__ - INFO - Input Text: apa yang anda tahu tentang fasilitas yang ada di perpustakaan?
|
312 |
+
2025-05-10 21:38:40,891 - __main__ - INFO - Detection Method: combined
|
313 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - Logits: [[-1.6494208574295044, 0.12875649333000183, -0.980239748954773, -1.2830331325531006, 5.473063945770264, -1.3103773593902588, -0.9920451045036316, -0.4207743704319, -0.007997849956154823]]
|
314 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - Softmax Probabilities: [[0.0007925480604171753, 0.004691137932240963, 0.00154755893163383, 0.0011432621395215392, 0.9823843240737915, 0.0011124236043542624, 0.0015293973265215755, 0.002707820851355791, 0.0040915366262197495]]
|
315 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - Max Probability (Confidence): 0.9824
|
316 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - Predicted Index: 4
|
317 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - Energy Score: -5.4908
|
318 |
+
2025-05-10 21:38:41,756 - __main__ - INFO - OOD Detection -> is_ood: False
|
319 |
+
2025-05-10 21:38:41,772 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
320 |
+
2025-05-10 21:38:41,855 - __main__ - INFO - Predicted Intent: fasilitas
|
321 |
+
2025-05-10 21:38:41,855 - __main__ - INFO - =============================================
|
322 |
+
|
323 |
+
2025-05-10 21:38:41,868 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:41] "POST /api/analyze HTTP/1.1" 200 -
|
324 |
+
2025-05-10 21:38:51,135 - __main__ - INFO -
|
325 |
+
========== INTENT PREDICTION DEBUG ==========
|
326 |
+
2025-05-10 21:38:51,135 - __main__ - INFO - Input Text: fasilitas yang ada di perpustakaan?
|
327 |
+
2025-05-10 21:38:51,135 - __main__ - INFO - Detection Method: combined
|
328 |
+
2025-05-10 21:38:51,684 - __main__ - INFO - Logits: [[-1.697698712348938, -0.4591014087200165, -0.9102374315261841, -1.1577955484390259, 5.437514305114746, -1.2155272960662842, -0.505920946598053, -0.38150474429130554, -0.2966429889202118]]
|
329 |
+
2025-05-10 21:38:51,684 - __main__ - INFO - Softmax Probabilities: [[0.0007834106218069792, 0.00270336982794106, 0.0017217874992638826, 0.0013442077906802297, 0.9834970235824585, 0.00126880151219666, 0.0025797162670642138, 0.0029214955866336823, 0.0031802428420633078]]
|
330 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - Max Probability (Confidence): 0.9835
|
331 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - Predicted Index: 4
|
332 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - Energy Score: -5.4542
|
333 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - OOD Detection -> is_ood: False
|
334 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
335 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - Predicted Intent: fasilitas
|
336 |
+
2025-05-10 21:38:51,699 - __main__ - INFO - =============================================
|
337 |
+
|
338 |
+
2025-05-10 21:38:51,715 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:51] "POST /api/analyze HTTP/1.1" 200 -
|
339 |
+
2025-05-10 21:38:57,269 - __main__ - INFO -
|
340 |
+
========== INTENT PREDICTION DEBUG ==========
|
341 |
+
2025-05-10 21:38:57,273 - __main__ - INFO - Input Text: fasilitas?
|
342 |
+
2025-05-10 21:38:57,277 - __main__ - INFO - Detection Method: combined
|
343 |
+
2025-05-10 21:38:57,554 - __main__ - INFO - Logits: [[-2.5640785694122314, -1.0151931047439575, -0.9696947932243347, -1.25716233253479, 3.5866546630859375, -0.20148932933807373, 2.6498327255249023, -0.7469539642333984, -0.534983217716217]]
|
344 |
+
2025-05-10 21:38:57,554 - __main__ - INFO - Softmax Probabilities: [[0.0014459670055657625, 0.0068050408735871315, 0.007121811155229807, 0.005342504940927029, 0.6782468557357788, 0.01535386499017477, 0.26578542590141296, 0.008898678235709667, 0.010999760590493679]]
|
345 |
+
2025-05-10 21:38:57,554 - __main__ - INFO - Max Probability (Confidence): 0.6782
|
346 |
+
2025-05-10 21:38:57,554 - __main__ - INFO - Predicted Index: 4
|
347 |
+
2025-05-10 21:38:57,570 - __main__ - INFO - Energy Score: -3.9749
|
348 |
+
2025-05-10 21:38:57,570 - __main__ - INFO - OOD Detection -> is_ood: True
|
349 |
+
2025-05-10 21:38:57,570 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
|
350 |
+
2025-05-10 21:38:57,570 - __main__ - INFO - Predicted Intent: unknown
|
351 |
+
2025-05-10 21:38:57,586 - __main__ - INFO - =============================================
|
352 |
+
|
353 |
+
2025-05-10 21:38:57,586 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:57] "POST /api/analyze HTTP/1.1" 200 -
|
354 |
+
2025-05-10 21:57:53,426 - __main__ - INFO -
|
355 |
+
========== INTENT PREDICTION DEBUG ==========
|
356 |
+
2025-05-10 21:57:53,426 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda yang ada di perpustakaan?
|
357 |
+
2025-05-10 21:57:53,426 - __main__ - INFO - Detection Method: combined
|
358 |
+
2025-05-10 21:57:54,536 - __main__ - INFO - Logits: [[-0.7680988907814026, 1.2449392080307007, -1.196992039680481, -1.266596794128418, 5.14818000793457, -1.350130319595337, -1.160045862197876, -0.40871661901474, -0.8047224283218384]]
|
359 |
+
2025-05-10 21:57:54,536 - __main__ - INFO - Softmax Probabilities: [[0.002601428423076868, 0.019474362954497337, 0.0016941269859671593, 0.0015802178531885147, 0.9652040600776672, 0.001453579985536635, 0.0017578894039615989, 0.003726400900632143, 0.002507878467440605]]
|
360 |
+
2025-05-10 21:57:54,536 - __main__ - INFO - Max Probability (Confidence): 0.9652
|
361 |
+
2025-05-10 21:57:54,536 - __main__ - INFO - Predicted Index: 4
|
362 |
+
2025-05-10 21:57:54,536 - __main__ - INFO - Energy Score: -5.1836
|
363 |
+
2025-05-10 21:57:54,551 - __main__ - INFO - OOD Detection -> is_ood: False
|
364 |
+
2025-05-10 21:57:54,551 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
365 |
+
2025-05-10 21:57:54,567 - __main__ - INFO - Predicted Intent: fasilitas
|
366 |
+
2025-05-10 21:57:54,598 - __main__ - INFO - =============================================
|
367 |
+
|
368 |
+
2025-05-10 21:57:54,676 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:57:54] "POST /api/analyze HTTP/1.1" 200 -
|
369 |
+
2025-05-10 21:58:09,650 - __main__ - INFO -
|
370 |
+
========== INTENT PREDICTION DEBUG ==========
|
371 |
+
2025-05-10 21:58:09,650 - __main__ - INFO - Input Text: selamat pagi dan selamat malam dan selamat siang
|
372 |
+
2025-05-10 21:58:09,650 - __main__ - INFO - Detection Method: combined
|
373 |
+
2025-05-10 21:58:10,353 - __main__ - INFO - Logits: [[-1.015725016593933, -1.7672958374023438, -0.09375888109207153, -0.000339341553626582, -0.7917280793190002, 0.4557315409183502, 3.838515281677246, -0.13580496609210968, -1.4231810569763184]]
|
374 |
+
2025-05-10 21:58:10,353 - __main__ - INFO - Softmax Probabilities: [[0.006958352401852608, 0.003281734185293317, 0.017494892701506615, 0.01920803263783455, 0.00870536733418703, 0.030307628214359283, 0.8926397562026978, 0.016774550080299377, 0.0046296752989292145]]
|
375 |
+
2025-05-10 21:58:10,353 - __main__ - INFO - Max Probability (Confidence): 0.8926
|
376 |
+
2025-05-10 21:58:10,353 - __main__ - INFO - Predicted Index: 6
|
377 |
+
2025-05-10 21:58:10,368 - __main__ - INFO - Energy Score: -3.9521
|
378 |
+
2025-05-10 21:58:10,368 - __main__ - INFO - OOD Detection -> is_ood: False
|
379 |
+
2025-05-10 21:58:10,368 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
380 |
+
2025-05-10 21:58:10,368 - __main__ - INFO - Predicted Intent: greeting
|
381 |
+
2025-05-10 21:58:10,368 - __main__ - INFO - =============================================
|
382 |
+
|
383 |
+
2025-05-10 21:58:10,368 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:10] "POST /api/analyze HTTP/1.1" 200 -
|
384 |
+
2025-05-10 21:58:16,363 - __main__ - INFO -
|
385 |
+
========== INTENT PREDICTION DEBUG ==========
|
386 |
+
2025-05-10 21:58:16,363 - __main__ - INFO - Input Text: lo jawa
|
387 |
+
2025-05-10 21:58:16,363 - __main__ - INFO - Detection Method: combined
|
388 |
+
2025-05-10 21:58:16,774 - __main__ - INFO - Logits: [[-1.2190250158309937, -1.6057887077331543, 2.1177775859832764, 3.414398670196533, -1.2115315198898315, -1.0860720872879028, 1.689135193824768, -2.435594320297241, -0.16704648733139038]]
|
389 |
+
2025-05-10 21:58:16,775 - __main__ - INFO - Softmax Probabilities: [[0.006397695280611515, 0.004345645196735859, 0.17996107041835785, 0.6581031084060669, 0.0064458162523806095, 0.007307425606995821, 0.11722534894943237, 0.001895284280180931, 0.01831859163939953]]
|
390 |
+
2025-05-10 21:58:16,802 - __main__ - INFO - Max Probability (Confidence): 0.6581
|
391 |
+
2025-05-10 21:58:16,802 - __main__ - INFO - Predicted Index: 3
|
392 |
+
2025-05-10 21:58:16,818 - __main__ - INFO - Energy Score: -3.8328
|
393 |
+
2025-05-10 21:58:16,818 - __main__ - INFO - OOD Detection -> is_ood: True
|
394 |
+
2025-05-10 21:58:16,818 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
|
395 |
+
2025-05-10 21:58:16,818 - __main__ - INFO - Predicted Intent: unknown
|
396 |
+
2025-05-10 21:58:16,834 - __main__ - INFO - =============================================
|
397 |
+
|
398 |
+
2025-05-10 21:58:16,849 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:16] "POST /api/analyze HTTP/1.1" 200 -
|
399 |
+
2025-05-10 21:58:29,883 - __main__ - INFO -
|
400 |
+
========== INTENT PREDICTION DEBUG ==========
|
401 |
+
2025-05-10 21:58:29,883 - __main__ - INFO - Input Text: Assalamualaikum wr wb
|
402 |
+
2025-05-10 21:58:29,883 - __main__ - INFO - Detection Method: combined
|
403 |
+
2025-05-10 21:58:32,791 - __main__ - INFO - Logits: [[-1.131690263748169, -1.5006943941116333, -0.6151072978973389, -1.1938962936401367, -1.1617226600646973, 1.5072420835494995, 5.034933567047119, -1.0742135047912598, -0.8630368709564209]]
|
404 |
+
2025-05-10 21:58:33,003 - __main__ - INFO - Softmax Probabilities: [[0.0020071538165211678, 0.0013877918245270848, 0.0033645734656602144, 0.0018861013231799006, 0.0019477707101032138, 0.028096651658415794, 0.956558346748352, 0.002125898841768503, 0.0026257631834596395]]
|
405 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - Max Probability (Confidence): 0.9566
|
406 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - Predicted Index: 6
|
407 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - Energy Score: -5.0793
|
408 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - OOD Detection -> is_ood: False
|
409 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
410 |
+
2025-05-10 21:58:33,034 - __main__ - INFO - Predicted Intent: greeting
|
411 |
+
2025-05-10 21:58:33,049 - __main__ - INFO - =============================================
|
412 |
+
|
413 |
+
2025-05-10 21:58:33,113 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:33] "POST /api/analyze HTTP/1.1" 200 -
|
414 |
+
2025-05-10 21:58:40,238 - __main__ - INFO -
|
415 |
+
========== INTENT PREDICTION DEBUG ==========
|
416 |
+
2025-05-10 21:58:40,238 - __main__ - INFO - Input Text: ass
|
417 |
+
2025-05-10 21:58:40,238 - __main__ - INFO - Detection Method: combined
|
418 |
+
2025-05-10 21:58:40,584 - __main__ - INFO - Logits: [[-1.3392796516418457, -1.524475336074829, 0.16145886480808258, -0.7817472815513611, -1.299397349357605, 1.4473503828048706, 5.0309271812438965, -1.4974311590194702, -0.9901137948036194]]
|
419 |
+
2025-05-10 21:58:40,584 - __main__ - INFO - Softmax Probabilities: [[0.0016345757758244872, 0.0013582368846982718, 0.007331073749810457, 0.002854553982615471, 0.0017010837327688932, 0.026523033156991005, 0.9548842906951904, 0.0013954705791547894, 0.0023176397662609816]]
|
420 |
+
2025-05-10 21:58:40,584 - __main__ - INFO - Max Probability (Confidence): 0.9549
|
421 |
+
2025-05-10 21:58:40,584 - __main__ - INFO - Predicted Index: 6
|
422 |
+
2025-05-10 21:58:40,584 - __main__ - INFO - Energy Score: -5.0771
|
423 |
+
2025-05-10 21:58:40,599 - __main__ - INFO - OOD Detection -> is_ood: False
|
424 |
+
2025-05-10 21:58:40,631 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
425 |
+
2025-05-10 21:58:40,631 - __main__ - INFO - Predicted Intent: greeting
|
426 |
+
2025-05-10 21:58:40,631 - __main__ - INFO - =============================================
|
427 |
+
|
428 |
+
2025-05-10 21:58:40,646 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:40] "POST /api/analyze HTTP/1.1" 200 -
|
429 |
+
2025-05-10 21:59:07,034 - __main__ - INFO -
|
430 |
+
========== INTENT PREDICTION DEBUG ==========
|
431 |
+
2025-05-10 21:59:07,034 - __main__ - INFO - Input Text: gausah kocak besok aja bye
|
432 |
+
2025-05-10 21:59:07,034 - __main__ - INFO - Detection Method: combined
|
433 |
+
2025-05-10 21:59:07,812 - __main__ - INFO - Logits: [[-0.6060627102851868, -0.5750446915626526, -1.0750855207443237, 0.9511569738388062, -1.388703465461731, 4.919146537780762, -0.24468335509300232, -0.4451393187046051, -0.9716767072677612]]
|
434 |
+
2025-05-10 21:59:07,812 - __main__ - INFO - Softmax Probabilities: [[0.003815301228314638, 0.003935500048100948, 0.002386903390288353, 0.01810593344271183, 0.001744345179758966, 0.9574074745178223, 0.005476133432239294, 0.004481433890759945, 0.0026469440199434757]]
|
435 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - Max Probability (Confidence): 0.9574
|
436 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - Predicted Index: 5
|
437 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - Energy Score: -4.9627
|
438 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - OOD Detection -> is_ood: False
|
439 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
440 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - Predicted Intent: goodbye
|
441 |
+
2025-05-10 21:59:07,828 - __main__ - INFO - =============================================
|
442 |
+
|
443 |
+
2025-05-10 21:59:07,843 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:07] "POST /api/analyze HTTP/1.1" 200 -
|
444 |
+
2025-05-10 21:59:11,113 - __main__ - INFO -
|
445 |
+
========== INTENT PREDICTION DEBUG ==========
|
446 |
+
2025-05-10 21:59:11,113 - __main__ - INFO - Input Text: gausah
|
447 |
+
2025-05-10 21:59:11,113 - __main__ - INFO - Detection Method: combined
|
448 |
+
2025-05-10 21:59:11,479 - __main__ - INFO - Logits: [[-1.1790672540664673, -0.19112896919250488, 0.6465687155723572, 5.715721130371094, -0.954384982585907, -0.07604362815618515, -1.0976141691207886, -1.0053229331970215, -0.6094058752059937]]
|
449 |
+
2025-05-10 21:59:11,480 - __main__ - INFO - Softmax Probabilities: [[0.0009947115322574973, 0.0026714885607361794, 0.006173915695399046, 0.9818962812423706, 0.0012453041272237897, 0.0029973271302878857, 0.0010791246313601732, 0.0011834590695798397, 0.0017583195585757494]]
|
450 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - Max Probability (Confidence): 0.9819
|
451 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - Predicted Index: 3
|
452 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - Energy Score: -5.7340
|
453 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - OOD Detection -> is_ood: False
|
454 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
455 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - Predicted Intent: denied
|
456 |
+
2025-05-10 21:59:11,484 - __main__ - INFO - =============================================
|
457 |
+
|
458 |
+
2025-05-10 21:59:11,533 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:11] "POST /api/analyze HTTP/1.1" 200 -
|
459 |
+
2025-05-10 21:59:20,905 - __main__ - INFO -
|
460 |
+
========== INTENT PREDICTION DEBUG ==========
|
461 |
+
2025-05-10 21:59:20,905 - __main__ - INFO - Input Text: aku datang besok
|
462 |
+
2025-05-10 21:59:20,905 - __main__ - INFO - Detection Method: combined
|
463 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Logits: [[-0.8137130737304688, -0.8092074394226074, -1.8744394779205322, 0.2625807821750641, -1.8174302577972412, 4.685948371887207, 0.7935030460357666, 0.8845048546791077, -1.1589834690093994]]
|
464 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Softmax Probabilities: [[0.0038252437952905893, 0.00384251750074327, 0.0013243157882243395, 0.011222448199987411, 0.0014020069502294064, 0.935689389705658, 0.019083769991993904, 0.02090189978480339, 0.002708383370190859]]
|
465 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Max Probability (Confidence): 0.9357
|
466 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Predicted Index: 5
|
467 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Energy Score: -4.7524
|
468 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - OOD Detection -> is_ood: False
|
469 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
470 |
+
2025-05-10 21:59:21,327 - __main__ - INFO - Predicted Intent: goodbye
|
471 |
+
2025-05-10 21:59:21,343 - __main__ - INFO - =============================================
|
472 |
+
|
473 |
+
2025-05-10 21:59:21,343 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:21] "POST /api/analyze HTTP/1.1" 200 -
|
474 |
+
2025-05-10 21:59:28,988 - __main__ - INFO -
|
475 |
+
========== INTENT PREDICTION DEBUG ==========
|
476 |
+
2025-05-10 21:59:28,988 - __main__ - INFO - Input Text: aku akan tiba besok pagi
|
477 |
+
2025-05-10 21:59:28,993 - __main__ - INFO - Detection Method: combined
|
478 |
+
2025-05-10 21:59:29,531 - __main__ - INFO - Logits: [[-1.0576566457748413, -0.4939669370651245, -1.9942526817321777, 0.07859884947538376, -1.6343517303466797, 3.7366526126861572, 0.27028337121009827, 2.3993427753448486, -1.3766415119171143]]
|
479 |
+
2025-05-10 21:59:29,531 - __main__ - INFO - Softmax Probabilities: [[0.00610245019197464, 0.01072288304567337, 0.0023919143714010715, 0.019009629264473915, 0.0034280631225556135, 0.7373034358024597, 0.02302614599466324, 0.193579763174057, 0.004435788374394178]]
|
480 |
+
2025-05-10 21:59:29,531 - __main__ - INFO - Max Probability (Confidence): 0.7373
|
481 |
+
2025-05-10 21:59:29,531 - __main__ - INFO - Predicted Index: 5
|
482 |
+
2025-05-10 21:59:29,531 - __main__ - INFO - Energy Score: -4.0414
|
483 |
+
2025-05-10 21:59:29,547 - __main__ - INFO - OOD Detection -> is_ood: True
|
484 |
+
2025-05-10 21:59:29,547 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
|
485 |
+
2025-05-10 21:59:29,547 - __main__ - INFO - Predicted Intent: unknown
|
486 |
+
2025-05-10 21:59:29,547 - __main__ - INFO - =============================================
|
487 |
+
|
488 |
+
2025-05-10 21:59:29,563 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:29] "POST /api/analyze HTTP/1.1" 200 -
|
489 |
+
2025-05-10 22:02:08,601 - __main__ - INFO -
|
490 |
+
========== INTENT PREDICTION DEBUG ==========
|
491 |
+
2025-05-10 22:02:08,601 - __main__ - INFO - Input Text: cariin buku bang
|
492 |
+
2025-05-10 22:02:08,601 - __main__ - INFO - Detection Method: combined
|
493 |
+
2025-05-10 22:02:09,038 - __main__ - INFO - Logits: [[-0.49238744378089905, 5.4453630447387695, -1.1035208702087402, -0.4974009394645691, 0.0320424884557724, -0.42038029432296753, -0.6259087324142456, -0.6300820112228394, -1.6206642389297485]]
|
494 |
+
2025-05-10 22:02:09,038 - __main__ - INFO - Softmax Probabilities: [[0.0025876371655613184, 0.9809244871139526, 0.0014044019626453519, 0.0025746964383870363, 0.00437180045992136, 0.0027808379381895065, 0.002264204667881131, 0.002254775259643793, 0.00083733448991552]]
|
495 |
+
2025-05-10 22:02:09,038 - __main__ - INFO - Max Probability (Confidence): 0.9809
|
496 |
+
2025-05-10 22:02:09,038 - __main__ - INFO - Predicted Index: 1
|
497 |
+
2025-05-10 22:02:09,053 - __main__ - INFO - Energy Score: -5.4646
|
498 |
+
2025-05-10 22:02:09,053 - __main__ - INFO - OOD Detection -> is_ood: False
|
499 |
+
2025-05-10 22:02:09,053 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
|
500 |
+
2025-05-10 22:02:09,053 - __main__ - INFO - Predicted Intent: cari_buku
|
501 |
+
2025-05-10 22:02:09,053 - __main__ - INFO - =============================================
|
502 |
+
|
503 |
+
2025-05-10 22:02:09,069 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 22:02:09] "POST /api/analyze HTTP/1.1" 200 -
|
504 |
+
2025-05-10 22:02:12,836 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 22:02:12] "[33mPOST /api/recommend HTTP/1.1[0m" 404 -
|
model/class_metrics/f1_per_class.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/class_metrics/f1_per_class.png
ADDED
![]() |
model/class_performance_metrics.png
ADDED
![]() |
model/classification_report.csv
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,precision,recall,f1-score,support
|
2 |
+
cara_pinjam,0.984,1.0,0.992,179.0
|
3 |
+
cari_buku,0.989,0.983,0.986,180.0
|
4 |
+
confirm,0.975,0.975,0.975,120.0
|
5 |
+
denied,0.986,0.98,0.983,147.0
|
6 |
+
fasilitas,0.989,0.983,0.986,178.0
|
7 |
+
goodbye,0.993,0.978,0.985,136.0
|
8 |
+
greeting,0.944,0.992,0.967,118.0
|
9 |
+
jam_layanan,0.994,1.0,0.997,178.0
|
10 |
+
keanggotaan,0.994,0.994,0.994,179.0
|
11 |
+
peraturan,1.0,0.971,0.985,174.0
|
12 |
+
accuracy,0.986,0.986,0.986,0.986
|
13 |
+
macro avg,0.985,0.986,0.985,1589.0
|
14 |
+
weighted avg,0.986,0.986,0.986,1589.0
|
model/classification_report.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"0": {
|
3 |
+
"precision": 0.9944444444444445,
|
4 |
+
"recall": 1.0,
|
5 |
+
"f1-score": 0.9972144846796658,
|
6 |
+
"support": 179.0
|
7 |
+
},
|
8 |
+
"1": {
|
9 |
+
"precision": 0.9888888888888889,
|
10 |
+
"recall": 0.9888888888888889,
|
11 |
+
"f1-score": 0.9888888888888889,
|
12 |
+
"support": 180.0
|
13 |
+
},
|
14 |
+
"2": {
|
15 |
+
"precision": 0.975,
|
16 |
+
"recall": 0.975,
|
17 |
+
"f1-score": 0.975,
|
18 |
+
"support": 120.0
|
19 |
+
},
|
20 |
+
"3": {
|
21 |
+
"precision": 0.9863013698630136,
|
22 |
+
"recall": 0.9795918367346939,
|
23 |
+
"f1-score": 0.9829351535836177,
|
24 |
+
"support": 147.0
|
25 |
+
},
|
26 |
+
"4": {
|
27 |
+
"precision": 0.9943181818181818,
|
28 |
+
"recall": 0.9831460674157303,
|
29 |
+
"f1-score": 0.9887005649717514,
|
30 |
+
"support": 178.0
|
31 |
+
},
|
32 |
+
"5": {
|
33 |
+
"precision": 0.9925373134328358,
|
34 |
+
"recall": 0.9779411764705882,
|
35 |
+
"f1-score": 0.9851851851851852,
|
36 |
+
"support": 136.0
|
37 |
+
},
|
38 |
+
"6": {
|
39 |
+
"precision": 0.9435483870967742,
|
40 |
+
"recall": 0.9915254237288136,
|
41 |
+
"f1-score": 0.9669421487603306,
|
42 |
+
"support": 118.0
|
43 |
+
},
|
44 |
+
"7": {
|
45 |
+
"precision": 0.994413407821229,
|
46 |
+
"recall": 1.0,
|
47 |
+
"f1-score": 0.9971988795518207,
|
48 |
+
"support": 178.0
|
49 |
+
},
|
50 |
+
"8": {
|
51 |
+
"precision": 0.9944444444444445,
|
52 |
+
"recall": 1.0,
|
53 |
+
"f1-score": 0.9972144846796658,
|
54 |
+
"support": 179.0
|
55 |
+
},
|
56 |
+
"9": {
|
57 |
+
"precision": 1.0,
|
58 |
+
"recall": 0.9770114942528736,
|
59 |
+
"f1-score": 0.9883720930232558,
|
60 |
+
"support": 174.0
|
61 |
+
},
|
62 |
+
"accuracy": 0.9880427942101951,
|
63 |
+
"macro avg": {
|
64 |
+
"precision": 0.9863896437809814,
|
65 |
+
"recall": 0.9873104887491589,
|
66 |
+
"f1-score": 0.9867651883324182,
|
67 |
+
"support": 1589.0
|
68 |
+
},
|
69 |
+
"weighted avg": {
|
70 |
+
"precision": 0.9882412962498892,
|
71 |
+
"recall": 0.9880427942101951,
|
72 |
+
"f1-score": 0.988071401599911,
|
73 |
+
"support": 1589.0
|
74 |
+
}
|
75 |
+
}
|
model/combined_metrics.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"XLMRobertaForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0",
|
14 |
+
"1": "LABEL_1",
|
15 |
+
"2": "LABEL_2",
|
16 |
+
"3": "LABEL_3",
|
17 |
+
"4": "LABEL_4",
|
18 |
+
"5": "LABEL_5",
|
19 |
+
"6": "LABEL_6",
|
20 |
+
"7": "LABEL_7",
|
21 |
+
"8": "LABEL_8",
|
22 |
+
"9": "LABEL_9"
|
23 |
+
},
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 3072,
|
26 |
+
"label2id": {
|
27 |
+
"LABEL_0": 0,
|
28 |
+
"LABEL_1": 1,
|
29 |
+
"LABEL_2": 2,
|
30 |
+
"LABEL_3": 3,
|
31 |
+
"LABEL_4": 4,
|
32 |
+
"LABEL_5": 5,
|
33 |
+
"LABEL_6": 6,
|
34 |
+
"LABEL_7": 7,
|
35 |
+
"LABEL_8": 8,
|
36 |
+
"LABEL_9": 9
|
37 |
+
},
|
38 |
+
"layer_norm_eps": 1e-05,
|
39 |
+
"max_position_embeddings": 514,
|
40 |
+
"model_type": "xlm-roberta",
|
41 |
+
"num_attention_heads": 12,
|
42 |
+
"num_hidden_layers": 12,
|
43 |
+
"output_past": true,
|
44 |
+
"pad_token_id": 1,
|
45 |
+
"position_embedding_type": "absolute",
|
46 |
+
"torch_dtype": "float32",
|
47 |
+
"transformers_version": "4.52.4",
|
48 |
+
"type_vocab_size": 1,
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 250002
|
51 |
+
}
|
model/confusion_matrices/cm_epoch_1.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_10.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_11.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_12.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_2.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_3.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_4.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_5.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_6.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_7.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_8.png
ADDED
![]() |
model/confusion_matrices/cm_epoch_9.png
ADDED
![]() |
model/enhanced_training_history.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/final_confusion_matrix.png
ADDED
![]() |
model/intent_classes.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7f6662dc15d44fb1eb878094ca3e51afbd73b93d0a13c0dc16ae5196667fedf
|
3 |
+
size 267
|
model/interactive_class_performance.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/interactive_confusion_matrix.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/interactive_training_metrics.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/label_encoder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10f038f671b716874251fcc9d9076b5096dbfff121e75cfde29bc91769479756
|
3 |
+
size 361
|
model/learning_rate_schedule.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/learning_rate_schedule.png
ADDED
![]() |
model/ood_thresholds.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"energy_threshold": -6.981417655944824,
|
3 |
+
"msp_threshold": 0.8888697624206543
|
4 |
+
}
|
model/sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
model/special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
model/test_results/test_run_20250702_143737/classification_report.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cara_pinjam": {
|
3 |
+
"precision": 0.9534883720930233,
|
4 |
+
"recall": 1.0,
|
5 |
+
"f1-score": 0.9761904761904762,
|
6 |
+
"support": 41.0
|
7 |
+
},
|
8 |
+
"cari_buku": {
|
9 |
+
"precision": 1.0,
|
10 |
+
"recall": 1.0,
|
11 |
+
"f1-score": 1.0,
|
12 |
+
"support": 41.0
|
13 |
+
},
|
14 |
+
"confirm": {
|
15 |
+
"precision": 0.9714285714285714,
|
16 |
+
"recall": 0.8292682926829268,
|
17 |
+
"f1-score": 0.8947368421052632,
|
18 |
+
"support": 41.0
|
19 |
+
},
|
20 |
+
"denied": {
|
21 |
+
"precision": 0.9069767441860465,
|
22 |
+
"recall": 0.9512195121951219,
|
23 |
+
"f1-score": 0.9285714285714286,
|
24 |
+
"support": 41.0
|
25 |
+
},
|
26 |
+
"fasilitas": {
|
27 |
+
"precision": 1.0,
|
28 |
+
"recall": 1.0,
|
29 |
+
"f1-score": 1.0,
|
30 |
+
"support": 41.0
|
31 |
+
},
|
32 |
+
"goodbye": {
|
33 |
+
"precision": 0.9473684210526315,
|
34 |
+
"recall": 0.8780487804878049,
|
35 |
+
"f1-score": 0.9113924050632911,
|
36 |
+
"support": 41.0
|
37 |
+
},
|
38 |
+
"greeting": {
|
39 |
+
"precision": 0.8604651162790697,
|
40 |
+
"recall": 0.9024390243902439,
|
41 |
+
"f1-score": 0.8809523809523809,
|
42 |
+
"support": 41.0
|
43 |
+
},
|
44 |
+
"jam_layanan": {
|
45 |
+
"precision": 0.9318181818181818,
|
46 |
+
"recall": 1.0,
|
47 |
+
"f1-score": 0.9647058823529412,
|
48 |
+
"support": 41.0
|
49 |
+
},
|
50 |
+
"keanggotaan": {
|
51 |
+
"precision": 1.0,
|
52 |
+
"recall": 1.0,
|
53 |
+
"f1-score": 1.0,
|
54 |
+
"support": 41.0
|
55 |
+
},
|
56 |
+
"peraturan": {
|
57 |
+
"precision": 1.0,
|
58 |
+
"recall": 1.0,
|
59 |
+
"f1-score": 1.0,
|
60 |
+
"support": 41.0
|
61 |
+
},
|
62 |
+
"accuracy": 0.9560975609756097,
|
63 |
+
"macro avg": {
|
64 |
+
"precision": 0.9571545406857525,
|
65 |
+
"recall": 0.9560975609756097,
|
66 |
+
"f1-score": 0.9556549415235782,
|
67 |
+
"support": 410.0
|
68 |
+
},
|
69 |
+
"weighted avg": {
|
70 |
+
"precision": 0.9571545406857526,
|
71 |
+
"recall": 0.9560975609756097,
|
72 |
+
"f1-score": 0.955654941523578,
|
73 |
+
"support": 410.0
|
74 |
+
}
|
75 |
+
}
|
model/test_results/test_run_20250702_143737/confidence_analysis.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mean_confidence": 0.9509811997413635,
|
3 |
+
"std_confidence": 0.06421167403459549,
|
4 |
+
"min_confidence": 0.48898056149482727,
|
5 |
+
"max_confidence": 0.9849911332130432,
|
6 |
+
"median_confidence": 0.9713054299354553,
|
7 |
+
"q25_confidence": 0.9562950134277344,
|
8 |
+
"q75_confidence": 0.979069173336029,
|
9 |
+
"mean_confidence_correct": 0.9571561217308044,
|
10 |
+
"mean_confidence_incorrect": 0.816504180431366,
|
11 |
+
"std_confidence_correct": 0.048875272274017334,
|
12 |
+
"std_confidence_incorrect": 0.15158796310424805
|
13 |
+
}
|
model/test_results/test_run_20250702_143737/confidence_analysis.png
ADDED
![]() |
Git LFS Details
|