ZEROTSUDIOS commited on
Commit
c3637f5
·
verified ·
1 Parent(s): 9d1da50

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +49 -35
  2. .gitignore +1 -0
  3. DockerFile +23 -0
  4. api.log +0 -0
  5. app.py +190 -0
  6. app2.py +460 -0
  7. app3.py +580 -0
  8. app4.py +580 -0
  9. combined_api.log +0 -0
  10. evaluate_model.py +208 -0
  11. evaluation_plots/20250515_142829_confidence_distribution.png +0 -0
  12. evaluation_plots/20250515_142829_energy_distribution.png +0 -0
  13. evaluation_plots/20250515_142829_intent_distribution.png +0 -0
  14. evaluation_plots/20250515_142829_ood_by_method.png +0 -0
  15. hf.py +18 -0
  16. intent_api.log +504 -0
  17. model/class_metrics/f1_per_class.html +0 -0
  18. model/class_metrics/f1_per_class.png +0 -0
  19. model/class_performance_metrics.png +0 -0
  20. model/classification_report.csv +14 -0
  21. model/classification_report.json +75 -0
  22. model/combined_metrics.html +0 -0
  23. model/config.json +51 -0
  24. model/confusion_matrices/cm_epoch_1.png +0 -0
  25. model/confusion_matrices/cm_epoch_10.png +0 -0
  26. model/confusion_matrices/cm_epoch_11.png +0 -0
  27. model/confusion_matrices/cm_epoch_12.png +0 -0
  28. model/confusion_matrices/cm_epoch_2.png +0 -0
  29. model/confusion_matrices/cm_epoch_3.png +0 -0
  30. model/confusion_matrices/cm_epoch_4.png +0 -0
  31. model/confusion_matrices/cm_epoch_5.png +0 -0
  32. model/confusion_matrices/cm_epoch_6.png +0 -0
  33. model/confusion_matrices/cm_epoch_7.png +0 -0
  34. model/confusion_matrices/cm_epoch_8.png +0 -0
  35. model/confusion_matrices/cm_epoch_9.png +0 -0
  36. model/enhanced_training_history.json +0 -0
  37. model/final_confusion_matrix.png +0 -0
  38. model/intent_classes.pkl +3 -0
  39. model/interactive_class_performance.html +0 -0
  40. model/interactive_confusion_matrix.html +0 -0
  41. model/interactive_training_metrics.html +0 -0
  42. model/label_encoder.pkl +3 -0
  43. model/learning_rate_schedule.html +0 -0
  44. model/learning_rate_schedule.png +0 -0
  45. model/ood_thresholds.json +4 -0
  46. model/sentencepiece.bpe.model +3 -0
  47. model/special_tokens_map.json +15 -0
  48. model/test_results/test_run_20250702_143737/classification_report.json +75 -0
  49. model/test_results/test_run_20250702_143737/confidence_analysis.json +13 -0
  50. model/test_results/test_run_20250702_143737/confidence_analysis.png +3 -0
.gitattributes CHANGED
@@ -1,35 +1,49 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model/test_results/test_run_20250702_143737/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
37
+ model/test_results/test_run_20250702_143737/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
38
+ model/test_results/test_run_20250702_143737/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
39
+ model/test_results/test_run_20250702_143737/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
40
+ model/test_results/test_run_20250702_152814/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
41
+ model/test_results/test_run_20250702_152814/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
42
+ model/test_results/test_run_20250702_152814/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
43
+ model/test_results/test_run_20250702_152814/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
44
+ model/test_results/test_run_20250702_152925/confidence_analysis.png filter=lfs diff=lfs merge=lfs -text
45
+ model/test_results/test_run_20250702_152925/confusion_matrix_absolute.png filter=lfs diff=lfs merge=lfs -text
46
+ model/test_results/test_run_20250702_152925/confusion_matrix_normalized.png filter=lfs diff=lfs merge=lfs -text
47
+ model/test_results/test_run_20250702_152925/per_class_metrics.png filter=lfs diff=lfs merge=lfs -text
48
+ model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ model/training_metrics.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ model/model.safetensors
DockerFile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system packages (nltk needs some)
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy everything to container
13
+ COPY . .
14
+
15
+ # Install dependencies
16
+ RUN pip install --upgrade pip
17
+ RUN pip install -r requirements.txt
18
+
19
+ # Download NLTK data
20
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('stopwords'); nltk.download('wordnet')"
21
+
22
+ # Start app using gunicorn
23
+ CMD [\"gunicorn\", \"-w\", \"4\", \"-b\", \"0.0.0.0:7860\", \"app:app\"]
api.log ADDED
File without changes
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+ import os
10
+ import json
11
+ import logging
12
+ import re
13
+ import nltk
14
+ from nltk.corpus import stopwords
15
+ from nltk.stem import WordNetLemmatizer
16
+
17
+ # Download necessary NLTK resources
18
+ nltk.download('stopwords', quiet=True)
19
+ nltk.download('punkt', quiet=True)
20
+ nltk.download('wordnet', quiet=True)
21
+
22
+ # Configure logging
23
+ logging.basicConfig(level=logging.INFO,
24
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
25
+ logger = logging.getLogger(__name__)
26
+
27
+ app = Flask(__name__)
28
+ CORS(app)
29
+
30
+ # Global variables and constants
31
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
32
+ INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
33
+ RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
34
+
35
+ intent_model = None
36
+ intent_tokenizer = None
37
+ intent_classes = None
38
+ intent_thresholds = None
39
+ recommender = None
40
+ recommender_model_loaded = False
41
+
42
+ class BookRecommender:
43
+ def __init__(self, model_name='all-minilm-l6-v2'):
44
+ self.model_name = model_name
45
+ self.model = None
46
+ self.book_embeddings = None
47
+ self.df = None
48
+ self.stop_words = set(stopwords.words('english'))
49
+ self.lemmatizer = WordNetLemmatizer()
50
+
51
+ def preprocess_text(self, text):
52
+ if not isinstance(text, str):
53
+ return ""
54
+ text = text.lower()
55
+ text = re.sub(r'[^\w\s]', ' ', text)
56
+ tokens = nltk.word_tokenize(text)
57
+ tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
58
+ return ' '.join(tokens)
59
+
60
+ def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
61
+ try:
62
+ if not os.path.exists(folder_path):
63
+ return False
64
+ with open(os.path.join(folder_path, "config.pkl"), 'rb') as f:
65
+ config = pickle.load(f)
66
+ self.model_name = config['model_name']
67
+ self.model = SentenceTransformer(os.path.join(folder_path, "sentence_transformer"))
68
+ with open(os.path.join(folder_path, "book_embeddings.pkl"), 'rb') as f:
69
+ self.book_embeddings = pickle.load(f)
70
+ with open(os.path.join(folder_path, "books_data.pkl"), 'rb') as f:
71
+ self.df = pickle.load(f)
72
+ return True
73
+ except Exception as e:
74
+ logger.error(f"Error loading model: {str(e)}", exc_info=True)
75
+ return False
76
+
77
+ def recommend_books(self, user_query, top_n=5, include_description=True):
78
+ if self.model is None or self.book_embeddings is None or self.df is None:
79
+ return []
80
+ try:
81
+ processed_query = self.preprocess_text(user_query)
82
+ user_embedding = self.model.encode([processed_query])
83
+ similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
84
+ similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
85
+ recommendations = []
86
+ for i, idx in enumerate(similar_books_idx):
87
+ book_data = {
88
+ 'title': self.df.iloc[idx].get('Title', ''),
89
+ 'author': self.df.iloc[idx].get('Authors', ''),
90
+ 'category': self.df.iloc[idx].get('Category', ''),
91
+ 'year': self.df.iloc[idx].get('Publish Date (Year)', ''),
92
+ 'description': self.df.iloc[idx].get('Description', '')[:197] + "..." if include_description and 'Description' in self.df.columns else '',
93
+ 'relevance_score': float(similarities[idx]),
94
+ 'rank': i + 1
95
+ }
96
+ recommendations.append(book_data)
97
+ return recommendations
98
+ except Exception as e:
99
+ logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
100
+ return []
101
+
102
+
103
+ def load_ood_thresholds(model_path):
104
+ threshold_path = os.path.join(model_path, "ood_thresholds.json")
105
+ if os.path.exists(threshold_path):
106
+ with open(threshold_path, "r") as f:
107
+ return json.load(f)
108
+ return {"energy_threshold": 0.0, "msp_threshold": 0.5}
109
+
110
+
111
+ def load_intent_resources():
112
+ global intent_model, intent_tokenizer, intent_classes, intent_thresholds
113
+ try:
114
+ intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
115
+ intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
116
+ with open(os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl"), "rb") as f:
117
+ intent_classes = pickle.load(f)
118
+ intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
119
+ return True
120
+ except Exception as e:
121
+ logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
122
+ return False
123
+
124
+
125
+ def predict_intent(text, method='combined'):
126
+ inputs = intent_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
127
+ with torch.no_grad():
128
+ outputs = intent_model(**inputs)
129
+ logits = outputs.logits
130
+ probs = torch.nn.functional.softmax(logits, dim=-1)
131
+ max_prob, pred_idx = torch.max(probs, dim=-1)
132
+ energy = -torch.logsumexp(logits, dim=-1)
133
+ is_ood = False
134
+ if method == 'energy':
135
+ is_ood = energy.item() > intent_thresholds['energy_threshold']
136
+ elif method == 'msp':
137
+ is_ood = max_prob.item() < intent_thresholds['msp_threshold']
138
+ elif method == 'combined':
139
+ is_ood = (energy.item() > intent_thresholds['energy_threshold']) and (max_prob.item() < intent_thresholds['msp_threshold'])
140
+ return {
141
+ "intent": intent_classes[pred_idx.item()],
142
+ "is_ood": is_ood,
143
+ "confidence": max_prob.item(),
144
+ "energy_score": energy.item()
145
+ }
146
+
147
+
148
+ @app.route('/api/analyze', methods=['POST'])
149
+ def analyze():
150
+ if not request.is_json:
151
+ return jsonify({"error": "Request must be JSON"}), 400
152
+ data = request.get_json()
153
+ text = data.get('text')
154
+ method = data.get('method', 'combined')
155
+ result = predict_intent(text, method)
156
+ return jsonify(result)
157
+
158
+
159
+ @app.route('/api/recommend', methods=['POST'])
160
+ def recommend():
161
+ global recommender_model_loaded
162
+ if not recommender_model_loaded:
163
+ return jsonify({"error": "Recommendation model not loaded."}), 503
164
+ data = request.get_json()
165
+ query = data.get('query')
166
+ top_n = data.get('top_n', 5)
167
+ include_description = data.get('include_description', True)
168
+ threshold = data.get('threshold', 0.5)
169
+ if not query:
170
+ return jsonify({"error": "Missing query."}), 400
171
+ recommendations = recommender.recommend_books(query, top_n=top_n, include_description=include_description)
172
+ high_score = [rec for rec in recommendations if rec['relevance_score'] >= threshold]
173
+ low_score = [rec for rec in recommendations if rec['relevance_score'] < threshold]
174
+ return jsonify({
175
+ "query": query,
176
+ "threshold": threshold,
177
+ "high_recommendations": high_score,
178
+ "low_recommendations": low_score,
179
+ "total_count": len(recommendations),
180
+ "high_count": len(high_score),
181
+ "low_count": len(low_score)
182
+ })
183
+
184
+
185
+ if __name__ == '__main__':
186
+ load_intent_resources()
187
+ recommender = BookRecommender()
188
+ recommender_model_loaded = recommender.load_model()
189
+ port = int(os.environ.get('PORT', 5000))
190
+ app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
app2.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import pickle
6
+ import torch
7
+ import numpy as np
8
+ import pandas as pd
9
+ import os
10
+ import json
11
+ import re
12
+ import nltk
13
+ from nltk.corpus import stopwords
14
+ from nltk.stem import WordNetLemmatizer
15
+ import logging
16
+ from flask_cors import CORS
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
21
+ handlers=[logging.FileHandler("api.log"),
22
+ logging.StreamHandler()])
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Download required NLTK resources (only needs to run once on server startup)
26
+ try:
27
+ nltk.data.find('corpora/stopwords')
28
+ nltk.data.find('corpora/wordnet')
29
+ nltk.data.find('corpora/punkt')
30
+ except LookupError:
31
+ nltk.download('stopwords')
32
+ nltk.download('wordnet')
33
+ nltk.download('punkt')
34
+
35
+ app = Flask(__name__)
36
+ CORS(app) # Enable Cross-Origin Resource Sharing
37
+
38
+ # Global variables for intent classification
39
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
40
+ MODEL_SAVE_PATH = os.path.join(BASE_DIR, "model")
41
+ intent_model = None
42
+ intent_tokenizer = None
43
+ intent_classes = None
44
+ intent_thresholds = None
45
+
46
+ class BookRecommender:
47
+ def __init__(self, model_name='all-minilm-l6-v2'):
48
+ """Initialize the book recommender with specified model."""
49
+ self.model_name = model_name
50
+ self.model = None
51
+ self.book_embeddings = None
52
+ self.df = None
53
+ self.stop_words = set(stopwords.words('english'))
54
+ self.lemmatizer = WordNetLemmatizer()
55
+ logger.info(f"BookRecommender initialized with model: {model_name}")
56
+
57
+ def preprocess_text(self, text):
58
+ """Advanced text preprocessing with stopword removal and lemmatization."""
59
+ if not isinstance(text, str):
60
+ return ""
61
+
62
+ # Convert to lowercase and remove special characters
63
+ text = text.lower()
64
+ text = re.sub(r'[^\w\s]', ' ', text)
65
+
66
+ # Tokenize, remove stopwords, and lemmatize
67
+ tokens = nltk.word_tokenize(text)
68
+ tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
69
+
70
+ return ' '.join(tokens)
71
+
72
+ def load_model(self, folder_path="recommender_model"):
73
+ """Load a previously saved model and embeddings for inference."""
74
+ try:
75
+ # Check if folder exists
76
+ if not os.path.exists(folder_path):
77
+ logger.error(f"Model folder {folder_path} does not exist.")
78
+ return False
79
+
80
+ # Load configuration
81
+ config_path = os.path.join(folder_path, "config.pkl")
82
+ with open(config_path, 'rb') as f:
83
+ config = pickle.load(f)
84
+ self.model_name = config['model_name']
85
+ logger.info(f"Loaded configuration: model_name={self.model_name}")
86
+
87
+ # Load the sentence transformer model
88
+ model_path = os.path.join(folder_path, "sentence_transformer")
89
+ self.model = SentenceTransformer(model_path)
90
+ logger.info(f"Model loaded from {model_path}")
91
+
92
+ # Load book embeddings
93
+ embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
94
+ with open(embeddings_path, 'rb') as f:
95
+ self.book_embeddings = pickle.load(f)
96
+ logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
97
+
98
+ # Load the DataFrame
99
+ df_path = os.path.join(folder_path, "books_data.pkl")
100
+ with open(df_path, 'rb') as f:
101
+ self.df = pickle.load(f)
102
+ logger.info(f"DataFrame loaded: {len(self.df)} books")
103
+
104
+ return True
105
+
106
+ except Exception as e:
107
+ logger.error(f"Error loading model: {str(e)}", exc_info=True)
108
+ return False
109
+
110
+ def recommend_books(self, user_query, top_n=5, include_description=True):
111
+ """Recommend books based on user query."""
112
+ if self.model is None or self.book_embeddings is None or self.df is None:
113
+ logger.error("Model not initialized. Cannot make recommendations.")
114
+ return []
115
+
116
+ logger.info(f"Finding books similar to: '{user_query}'")
117
+
118
+ try:
119
+ # Preprocess the query the same way as the book text
120
+ processed_query = self.preprocess_text(user_query)
121
+
122
+ # Encode user query
123
+ user_embedding = self.model.encode([processed_query])
124
+
125
+ # Compute similarity between query and books
126
+ similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
127
+
128
+ # Get top N most similar books
129
+ similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
130
+
131
+ recommendations = []
132
+
133
+ for i, idx in enumerate(similar_books_idx):
134
+ book_data = {}
135
+
136
+ # Extract book information
137
+ if 'Title' in self.df.columns:
138
+ book_data['title'] = self.df.iloc[idx]['Title']
139
+
140
+ if 'Authors' in self.df.columns:
141
+ book_data['author'] = self.df.iloc[idx]['Authors']
142
+
143
+ if 'Category' in self.df.columns:
144
+ book_data['category'] = self.df.iloc[idx]['Category']
145
+
146
+ if 'Publish Date (Year)' in self.df.columns:
147
+ book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
148
+
149
+ if include_description and 'Description' in self.df.columns:
150
+ # Truncate long descriptions
151
+ description = self.df.iloc[idx]['Description']
152
+ if len(description) > 200:
153
+ description = description[:197] + "..."
154
+ book_data['description'] = description
155
+
156
+ # Add similarity score
157
+ book_data['relevance_score'] = float(similarities[idx])
158
+ book_data['rank'] = i + 1
159
+
160
+ recommendations.append(book_data)
161
+
162
+ logger.info(f"Successfully generated {len(recommendations)} recommendations")
163
+ return recommendations
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
167
+ return []
168
+
169
+ # Initialize the recommender system
170
+ recommender = BookRecommender()
171
+ recommender_model_loaded = False
172
+
173
+ def load_ood_thresholds(model_path):
174
+ """Load the OOD thresholds from the model directory - using JSON instead of pickle."""
175
+ # Look for JSON file instead of pickle
176
+ threshold_path = os.path.join(model_path, "ood_thresholds.json")
177
+
178
+ # Check if file exists before attempting to open
179
+ if os.path.exists(threshold_path):
180
+ with open(threshold_path, "r") as f:
181
+ return json.load(f)
182
+ else:
183
+ # Provide default thresholds if file not found
184
+ logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
185
+ return {
186
+ "energy_threshold": 0.0, # Replace with your default value
187
+ "msp_threshold": 0.5 # Replace with your default value
188
+ }
189
+
190
+ def load_intent_resources():
191
+ """Load model, tokenizer, intent classes, and thresholds for intent classification."""
192
+ global intent_model, intent_tokenizer, intent_classes, intent_thresholds
193
+
194
+ logger.info(f"Loading intent resources from {MODEL_SAVE_PATH}...")
195
+
196
+ # Load model and tokenizer
197
+ intent_model = AutoModelForSequenceClassification.from_pretrained(MODEL_SAVE_PATH)
198
+ intent_tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
199
+
200
+ # Load intent classes
201
+ intent_classes_path = os.path.join(MODEL_SAVE_PATH, "intent_classes.pkl")
202
+ if os.path.exists(intent_classes_path):
203
+ with open(intent_classes_path, "rb") as f:
204
+ intent_classes = pickle.load(f)
205
+ else:
206
+ raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
207
+
208
+ # Load OOD thresholds
209
+ intent_thresholds = load_ood_thresholds(MODEL_SAVE_PATH)
210
+
211
+ logger.info("Intent resources loaded successfully")
212
+ logger.info(f"Loaded {len(intent_classes)} intent classes")
213
+ logger.info(f"Thresholds: {intent_thresholds}")
214
+
215
+ def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
216
+ energy_threshold, msp_threshold, method='combined'):
217
+ """
218
+ Predict intent with enhanced out-of-distribution detection and print details to terminal.
219
+ """
220
+ logger.info("\n========== INTENT PREDICTION DEBUG ==========")
221
+ logger.info(f"Input Text: {text}")
222
+ logger.info(f"Detection Method: {method}")
223
+
224
+ # Tokenize input
225
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
226
+
227
+ # Get model outputs
228
+ with torch.no_grad():
229
+ outputs = model(**inputs)
230
+ logits = outputs.logits
231
+
232
+ logger.info(f"Logits: {logits.numpy().tolist()}")
233
+
234
+ # Get probabilities
235
+ probs = torch.nn.functional.softmax(logits, dim=-1)
236
+ max_prob, pred_idx = torch.max(probs, dim=-1)
237
+
238
+ logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
239
+ logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
240
+ logger.info(f"Predicted Index: {pred_idx.item()}")
241
+
242
+ # Calculate energy score
243
+ energy = -torch.logsumexp(logits, dim=-1)
244
+ logger.info(f"Energy Score: {energy.item():.4f}")
245
+
246
+ # OOD detection
247
+ is_ood = False
248
+ if method == 'energy':
249
+ is_ood = energy.item() > energy_threshold
250
+ elif method == 'msp':
251
+ is_ood = max_prob.item() < msp_threshold
252
+ elif method == 'combined':
253
+ is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
254
+
255
+ logger.info(f"OOD Detection -> is_ood: {is_ood}")
256
+ if is_ood:
257
+ logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
258
+ else:
259
+ logger.info("Prediction marked as IN-DISTRIBUTION.")
260
+
261
+ # Get intent label
262
+ predicted_intent = intent_classes[pred_idx.item()] if not is_ood else "unknown"
263
+ logger.info(f"Predicted Intent: {predicted_intent}")
264
+ logger.info("=============================================\n")
265
+
266
+ return {
267
+ "intent": predicted_intent,
268
+ "is_ood": is_ood,
269
+ "confidence": max_prob.item(),
270
+ "energy_score": energy.item()
271
+ }
272
+
273
+ def initialize_models():
274
+ """Load both models on startup."""
275
+ global recommender_model_loaded
276
+
277
+ # Load intent classification model
278
+ try:
279
+ load_intent_resources()
280
+ logger.info("Intent classification model loaded successfully!")
281
+ except Exception as e:
282
+ logger.error(f"Failed to load intent model: {str(e)}", exc_info=True)
283
+
284
+ # Load book recommender model
285
+ logger.info("Loading recommender model...")
286
+ recommender_model_loaded = recommender.load_model("recommender_model")
287
+ if recommender_model_loaded:
288
+ logger.info("Book recommender model loaded successfully!")
289
+ else:
290
+ logger.error("Failed to load book recommender model.")
291
+
292
+ @app.route('/api/health', methods=['GET'])
293
+ def health_check():
294
+ """Endpoint to check if the API is running and models are loaded."""
295
+ global recommender_model_loaded
296
+
297
+ intent_model_loaded = intent_model is not None and intent_tokenizer is not None
298
+
299
+ return jsonify({
300
+ "status": "healthy" if (intent_model_loaded and recommender_model_loaded) else "partial",
301
+ "services": {
302
+ "intent_classification": "loaded" if intent_model_loaded else "not loaded",
303
+ "book_recommendations": "loaded" if recommender_model_loaded else "not loaded"
304
+ }
305
+ })
306
+
307
+ @app.route('/api/analyze', methods=['POST'])
308
+ def analyze():
309
+ """Endpoint to predict intent from text."""
310
+ # Check if request contains JSON
311
+ if not request.is_json:
312
+ return jsonify({"error": "Request must be JSON"}), 400
313
+
314
+ # Get text from request
315
+ data = request.get_json()
316
+ if 'text' not in data:
317
+ return jsonify({"error": "Missing 'text' field in request"}), 400
318
+
319
+ text = data['text']
320
+
321
+ # Default to combined method unless specified
322
+ method = data.get('method', 'combined')
323
+ if method not in ['energy', 'msp', 'combined']:
324
+ return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
325
+
326
+ # Make prediction
327
+ result = predict_intent_with_enhanced_ood(
328
+ text,
329
+ intent_model,
330
+ intent_tokenizer,
331
+ intent_classes,
332
+ intent_thresholds["energy_threshold"],
333
+ intent_thresholds["msp_threshold"],
334
+ method=method
335
+ )
336
+
337
+ # Return prediction as JSON
338
+ return jsonify(result)
339
+
340
+ @app.route('/api/recommend', methods=['POST'])
341
+ def recommend():
342
+ """Endpoint to get book recommendations based on user query."""
343
+ global recommender_model_loaded
344
+
345
+ if not recommender_model_loaded:
346
+ return jsonify({
347
+ "error": "Model not loaded",
348
+ "message": "The recommendation model is not properly loaded."
349
+ }), 503
350
+
351
+ data = request.get_json()
352
+
353
+ if not data:
354
+ return jsonify({
355
+ "error": "Invalid request",
356
+ "message": "No JSON data provided."
357
+ }), 400
358
+
359
+ query = data.get('query')
360
+ top_n = data.get('top_n', 5)
361
+ include_description = data.get('include_description', True)
362
+
363
+ if not query:
364
+ return jsonify({
365
+ "error": "Missing parameter",
366
+ "message": "Query parameter is required."
367
+ }), 400
368
+
369
+ try:
370
+ # Get recommendations
371
+ recommendations = recommender.recommend_books(
372
+ user_query=query,
373
+ top_n=int(top_n),
374
+ include_description=bool(include_description)
375
+ )
376
+
377
+ # CLEAN recommendations to make it JSON serializable
378
+ def clean_np(obj):
379
+ if isinstance(obj, np.integer):
380
+ return int(obj)
381
+ elif isinstance(obj, np.floating):
382
+ return float(obj)
383
+ elif isinstance(obj, np.ndarray):
384
+ return obj.tolist()
385
+ elif isinstance(obj, dict):
386
+ return {k: clean_np(v) for k, v in obj.items()}
387
+ elif isinstance(obj, list):
388
+ return [clean_np(i) for i in obj]
389
+ else:
390
+ return obj
391
+
392
+ recommendations_clean = clean_np(recommendations)
393
+
394
+ return jsonify({
395
+ "query": query,
396
+ "recommendations": recommendations_clean,
397
+ "count": len(recommendations_clean)
398
+ })
399
+
400
+ except Exception as e:
401
+ logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
402
+ return jsonify({
403
+ "error": "Processing error",
404
+ "message": f"An error occurred while processing your request: {str(e)}"
405
+ }), 500
406
+
407
+
408
+ @app.route('/api/stats', methods=['GET'])
409
+ def get_stats():
410
+ """Endpoint to get statistics about the loaded dataset."""
411
+ global recommender, recommender_model_loaded
412
+
413
+ if not recommender_model_loaded:
414
+ return jsonify({
415
+ "error": "Model not loaded",
416
+ "message": "The recommendation model is not properly loaded."
417
+ }), 503
418
+
419
+ try:
420
+ stats = {
421
+ "total_books": len(recommender.df) if recommender.df is not None else 0,
422
+ "model_name": recommender.model_name,
423
+ "categories": list(recommender.df['Category'].value_counts().head(10).to_dict().keys())
424
+ if recommender.df is not None and 'Category' in recommender.df.columns else []
425
+ }
426
+
427
+ return jsonify(stats)
428
+
429
+ except Exception as e:
430
+ logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
431
+ return jsonify({
432
+ "error": "Processing error",
433
+ "message": f"An error occurred while retrieving stats: {str(e)}"
434
+ }), 500
435
+
436
+ @app.route('/', methods=['GET'])
437
+ def index():
438
+ """Root endpoint providing API information."""
439
+ return jsonify({
440
+ "status": "API is running",
441
+ "services": {
442
+ "intent_analysis": "Available at /api/analyze",
443
+ "book_recommendations": "Available at /api/recommend",
444
+ "statistics": "Available at /api/stats",
445
+ "health_check": "Available at /api/health"
446
+ },
447
+ "version": "1.0.0"
448
+ })
449
+
450
+ if __name__ == '__main__':
451
+ # Initialize models when the app starts
452
+ initialize_models()
453
+
454
+ # Set port from environment variable or default to 5000
455
+ port = int(os.environ.get('PORT', 5000))
456
+
457
+ # For development use debug=True, for production use debug=False
458
+ app.run(host='0.0.0.0', port=port, debug=False)
459
+
460
+ #curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
app3.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ from flask_cors import CORS
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+ import os
10
+ import json
11
+ import logging
12
+ import csv
13
+ import re
14
+ import nltk
15
+ from nltk.corpus import stopwords
16
+ from nltk.stem import WordNetLemmatizer
17
+ from datetime import datetime
18
+
19
+ # Download necessary NLTK resources
20
+ nltk.download('stopwords', quiet=True)
21
+ nltk.download('punkt', quiet=True)
22
+ nltk.download('wordnet', quiet=True)
23
+
24
+ # Configure logging
25
+ logging.basicConfig(level=logging.INFO,
26
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
27
+ handlers=[logging.FileHandler("combined_api.log"),
28
+ logging.StreamHandler()])
29
+ logger = logging.getLogger(__name__)
30
+
31
+ app = Flask(__name__)
32
+ CORS(app) # Enable Cross-Origin Resource Sharing
33
+
34
+ # Global variables and constants
35
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
36
+ INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
37
+ RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
38
+ EVAL_CSV = "model_evaluation.csv"
39
+
40
+ # Global model variables
41
+ intent_model = None
42
+ intent_tokenizer = None
43
+ intent_classes = None
44
+ intent_thresholds = None
45
+ recommender = None
46
+ recommender_model_loaded = False
47
+
48
+ #################################################
49
+ # Book Recommender System
50
+ #################################################
51
+
52
+ class BookRecommender:
53
+ def __init__(self, model_name='all-minilm-l6-v2'):
54
+ """Initialize the book recommender with specified model."""
55
+ self.model_name = model_name
56
+ self.model = None
57
+ self.book_embeddings = None
58
+ self.df = None
59
+ self.stop_words = set(stopwords.words('english'))
60
+ self.lemmatizer = WordNetLemmatizer()
61
+ logger.info(f"BookRecommender initialized with model: {model_name}")
62
+
63
+ def preprocess_text(self, text):
64
+ """Advanced text preprocessing with stopword removal and lemmatization."""
65
+ if not isinstance(text, str):
66
+ return ""
67
+
68
+ # Convert to lowercase and remove special characters
69
+ text = text.lower()
70
+ text = re.sub(r'[^\w\s]', ' ', text)
71
+
72
+ # Tokenize, remove stopwords, and lemmatize
73
+ tokens = nltk.word_tokenize(text)
74
+ tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
75
+
76
+ return ' '.join(tokens)
77
+
78
+ def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
79
+ """Load a previously saved model and embeddings for inference."""
80
+ try:
81
+ # Check if folder exists
82
+ if not os.path.exists(folder_path):
83
+ logger.error(f"Model folder {folder_path} does not exist.")
84
+ return False
85
+
86
+ # Load configuration
87
+ config_path = os.path.join(folder_path, "config.pkl")
88
+ with open(config_path, 'rb') as f:
89
+ config = pickle.load(f)
90
+ self.model_name = config['model_name']
91
+ logger.info(f"Loaded configuration: model_name={self.model_name}")
92
+
93
+ # Load the sentence transformer model
94
+ model_path = os.path.join(folder_path, "sentence_transformer")
95
+ self.model = SentenceTransformer(model_path)
96
+ logger.info(f"Model loaded from {model_path}")
97
+
98
+ # Load book embeddings
99
+ embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
100
+ with open(embeddings_path, 'rb') as f:
101
+ self.book_embeddings = pickle.load(f)
102
+ logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
103
+
104
+ # Load the DataFrame
105
+ df_path = os.path.join(folder_path, "books_data.pkl")
106
+ with open(df_path, 'rb') as f:
107
+ self.df = pickle.load(f)
108
+ logger.info(f"DataFrame loaded: {len(self.df)} books")
109
+
110
+ return True
111
+
112
+ except Exception as e:
113
+ logger.error(f"Error loading model: {str(e)}", exc_info=True)
114
+ return False
115
+
116
+ def recommend_books(self, user_query, top_n=5, include_description=True):
117
+ """Recommend books based on user query."""
118
+ if self.model is None or self.book_embeddings is None or self.df is None:
119
+ logger.error("Model not initialized. Cannot make recommendations.")
120
+ return []
121
+
122
+ logger.info(f"Finding books similar to: '{user_query}'")
123
+
124
+ try:
125
+ # Preprocess the query the same way as the book text
126
+ processed_query = self.preprocess_text(user_query)
127
+
128
+ # Encode user query
129
+ user_embedding = self.model.encode([processed_query])
130
+
131
+ # Compute similarity between query and books
132
+ similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
133
+
134
+ # Get top N most similar books
135
+ similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
136
+
137
+ recommendations = []
138
+
139
+ for i, idx in enumerate(similar_books_idx):
140
+ book_data = {}
141
+
142
+ # Extract book information
143
+ if 'Title' in self.df.columns:
144
+ book_data['title'] = self.df.iloc[idx]['Title']
145
+
146
+ if 'Authors' in self.df.columns:
147
+ book_data['author'] = self.df.iloc[idx]['Authors']
148
+
149
+ if 'Category' in self.df.columns:
150
+ book_data['category'] = self.df.iloc[idx]['Category']
151
+
152
+ if 'Publish Date (Year)' in self.df.columns:
153
+ book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
154
+
155
+ if include_description and 'Description' in self.df.columns:
156
+ # Truncate long descriptions
157
+ description = self.df.iloc[idx]['Description']
158
+ if len(description) > 200:
159
+ description = description[:197] + "..."
160
+ book_data['description'] = description
161
+
162
+ # Add similarity score
163
+ book_data['relevance_score'] = float(similarities[idx])
164
+ book_data['rank'] = i + 1
165
+
166
+ recommendations.append(book_data)
167
+
168
+ logger.info(f"Successfully generated {len(recommendations)} recommendations")
169
+ return recommendations
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
173
+ return []
174
+
175
+ #################################################
176
+ # Intent Classification
177
+ #################################################
178
+
179
+ def setup_evaluation_csv():
180
+ """Set up the CSV file for tracking model performance"""
181
+ if not os.path.exists(EVAL_CSV):
182
+ with open(EVAL_CSV, 'w', newline='') as f:
183
+ writer = csv.writer(f)
184
+ writer.writerow([
185
+ 'timestamp',
186
+ 'input_text',
187
+ 'predicted_intent',
188
+ 'is_ood',
189
+ 'confidence',
190
+ 'energy_score',
191
+ 'detection_method'
192
+ ])
193
+ logger.info(f"Created evaluation CSV file: {EVAL_CSV}")
194
+
195
+ def save_prediction_to_csv(input_text, result, method):
196
+ """Save prediction results to CSV for later analysis"""
197
+ with open(EVAL_CSV, 'a', newline='') as f:
198
+ writer = csv.writer(f)
199
+ writer.writerow([
200
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
201
+ input_text,
202
+ result['intent'],
203
+ result['is_ood'],
204
+ result['confidence'],
205
+ result['energy_score'],
206
+ method
207
+ ])
208
+
209
+ def load_ood_thresholds(model_path):
210
+ """Load the OOD thresholds from the model directory"""
211
+ threshold_path = os.path.join(model_path, "ood_thresholds.json")
212
+
213
+ if os.path.exists(threshold_path):
214
+ with open(threshold_path, "r") as f:
215
+ return json.load(f)
216
+ else:
217
+ # Provide default thresholds if file not found
218
+ logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
219
+ return {
220
+ "energy_threshold": 0.0, # Replace with your default value
221
+ "msp_threshold": 0.5 # Replace with your default value
222
+ }
223
+
224
+ def load_intent_resources():
225
+ """Load model, tokenizer, intent classes, and thresholds for intent classification."""
226
+ global intent_model, intent_tokenizer, intent_classes, intent_thresholds
227
+
228
+ logger.info(f"Loading intent resources from {INTENT_MODEL_PATH}...")
229
+
230
+ try:
231
+ # Load model and tokenizer
232
+ intent_model = AutoModelForSequenceClassification.from_pretrained(INTENT_MODEL_PATH)
233
+ intent_tokenizer = AutoTokenizer.from_pretrained(INTENT_MODEL_PATH)
234
+
235
+ # Load intent classes
236
+ intent_classes_path = os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl")
237
+ if os.path.exists(intent_classes_path):
238
+ with open(intent_classes_path, "rb") as f:
239
+ intent_classes = pickle.load(f)
240
+ else:
241
+ raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
242
+
243
+ # Load OOD thresholds
244
+ intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
245
+
246
+ logger.info("Intent resources loaded successfully")
247
+ logger.info(f"Loaded {len(intent_classes)} intent classes")
248
+ logger.info(f"Thresholds: {intent_thresholds}")
249
+ return True
250
+
251
+ except Exception as e:
252
+ logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
253
+ return False
254
+
255
+ def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
256
+ energy_threshold, msp_threshold, method='combined'):
257
+ """
258
+ Predict intent with enhanced out-of-distribution detection and detailed logging.
259
+ """
260
+ logger.info("\n========== INTENT PREDICTION DEBUG ==========")
261
+ logger.info(f"Input Text: {text}")
262
+ logger.info(f"Detection Method: {method}")
263
+
264
+ # Tokenize input
265
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
266
+
267
+ # Get model outputs
268
+ with torch.no_grad():
269
+ outputs = model(**inputs)
270
+ logits = outputs.logits
271
+
272
+ logger.info(f"Logits: {logits.numpy().tolist()}")
273
+
274
+ # Get probabilities
275
+ probs = torch.nn.functional.softmax(logits, dim=-1)
276
+ max_prob, pred_idx = torch.max(probs, dim=-1)
277
+
278
+ logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
279
+ logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
280
+ logger.info(f"Predicted Index: {pred_idx.item()}")
281
+
282
+ # Calculate energy score
283
+ energy = -torch.logsumexp(logits, dim=-1)
284
+ logger.info(f"Energy Score: {energy.item():.4f}")
285
+
286
+ # OOD detection
287
+ is_ood = False
288
+ if method == 'energy':
289
+ is_ood = energy.item() > energy_threshold
290
+ elif method == 'msp':
291
+ is_ood = max_prob.item() < msp_threshold
292
+ elif method == 'combined':
293
+ is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
294
+
295
+ logger.info(f"OOD Detection -> is_ood: {is_ood}")
296
+ if is_ood:
297
+ logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
298
+ else:
299
+ logger.info("Prediction marked as IN-DISTRIBUTION.")
300
+
301
+ # Get intent label
302
+ predicted_intent = intent_classes[pred_idx.item()]
303
+ logger.info(f"Predicted Intent: {predicted_intent}")
304
+ logger.info("=============================================\n")
305
+
306
+ return {
307
+ "intent": predicted_intent,
308
+ "is_ood": is_ood,
309
+ "confidence": max_prob.item(),
310
+ "energy_score": energy.item(),
311
+ # Add all class probabilities for detailed analysis
312
+ "class_probabilities": {
313
+ intent_classes[i]: float(prob)
314
+ for i, prob in enumerate(probs[0].numpy())
315
+ }
316
+ }
317
+
318
+ #################################################
319
+ # Server Initialization
320
+ #################################################
321
+
322
+ def initialize_models():
323
+ """Load all required models on startup."""
324
+ global recommender, recommender_model_loaded
325
+
326
+ # Create evaluation CSV if it doesn't exist
327
+ setup_evaluation_csv()
328
+
329
+ # Load intent classification model
330
+ intent_model_loaded = load_intent_resources()
331
+ if intent_model_loaded:
332
+ logger.info("Intent classification model loaded successfully!")
333
+ else:
334
+ logger.error("Failed to load intent model.")
335
+
336
+ # Initialize book recommender
337
+ recommender = BookRecommender()
338
+ recommender_model_loaded = recommender.load_model()
339
+ if recommender_model_loaded:
340
+ logger.info("Book recommendation model loaded successfully!")
341
+ else:
342
+ logger.error("Failed to load book recommendation model.")
343
+
344
+ return intent_model_loaded and recommender_model_loaded
345
+
346
+ #################################################
347
+ # API Routes
348
+ #################################################
349
+
350
+ @app.route('/api/health', methods=['GET'])
351
+ def health_check():
352
+ """Endpoint to check if the API is running and models are loaded."""
353
+ intent_models_loaded = intent_model is not None and intent_tokenizer is not None
354
+
355
+ return jsonify({
356
+ "status": "healthy" if (intent_models_loaded and recommender_model_loaded) else "partially_healthy" if (intent_models_loaded or recommender_model_loaded) else "unhealthy",
357
+ "intent_model_loaded": intent_models_loaded,
358
+ "recommender_model_loaded": recommender_model_loaded,
359
+ "available_endpoints": [
360
+ "/api/health",
361
+ "/api/analyze",
362
+ "/api/recommend",
363
+ "/api/stats",
364
+ "/api/download_eval_data"
365
+ ]
366
+ })
367
+
368
+ #################################################
369
+ # Intent Classification Routes
370
+ #################################################
371
+
372
+ @app.route('/api/analyze', methods=['POST'])
373
+ def analyze():
374
+ """Endpoint to predict intent from text."""
375
+ # Check if request contains JSON
376
+ if not request.is_json:
377
+ return jsonify({"error": "Request must be JSON"}), 400
378
+
379
+ # Get text from request
380
+ data = request.get_json()
381
+ if 'text' not in data:
382
+ return jsonify({"error": "Missing 'text' field in request"}), 400
383
+
384
+ text = data['text']
385
+
386
+ # Default to combined method unless specified
387
+ method = data.get('method', 'combined')
388
+ if method not in ['energy', 'msp', 'combined']:
389
+ return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
390
+
391
+ # Make prediction
392
+ result = predict_intent_with_enhanced_ood(
393
+ text,
394
+ intent_model,
395
+ intent_tokenizer,
396
+ intent_classes,
397
+ intent_thresholds["energy_threshold"],
398
+ intent_thresholds["msp_threshold"],
399
+ method=method
400
+ )
401
+
402
+ # Save result to CSV for evaluation
403
+ save_prediction_to_csv(text, result, method)
404
+
405
+ # Return prediction as JSON
406
+ return jsonify(result)
407
+
408
+ @app.route('/api/stats', methods=['GET'])
409
+ def get_stats():
410
+ """Get statistics about model usage and predictions."""
411
+ try:
412
+ stats = {
413
+ "intent_model_info": {
414
+ "num_intent_classes": len(intent_classes) if intent_classes else 0,
415
+ "model_path": INTENT_MODEL_PATH,
416
+ "thresholds": intent_thresholds
417
+ },
418
+ "recommender_model_info": {
419
+ "model_name": recommender.model_name if recommender else None,
420
+ "num_books": len(recommender.df) if recommender and recommender.df is not None else 0
421
+ },
422
+ "usage": {}
423
+ }
424
+
425
+ # Read CSV to generate statistics if it exists
426
+ if os.path.exists(EVAL_CSV):
427
+ with open(EVAL_CSV, 'r') as f:
428
+ reader = csv.DictReader(f)
429
+ rows = list(reader)
430
+
431
+ stats["usage"] = {
432
+ "total_queries": len(rows),
433
+ "ood_count": sum(1 for row in rows if row["is_ood"] == "True"),
434
+ "top_intents": {}
435
+ }
436
+
437
+ # Count intents for statistical analysis
438
+ intent_counts = {}
439
+ for row in rows:
440
+ intent = row["predicted_intent"]
441
+ if intent not in intent_counts:
442
+ intent_counts[intent] = 0
443
+ intent_counts[intent] += 1
444
+
445
+ # Get top 5 intents
446
+ top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:5]
447
+ stats["usage"]["top_intents"] = dict(top_intents)
448
+
449
+ return jsonify(stats)
450
+
451
+ except Exception as e:
452
+ logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
453
+ return jsonify({
454
+ "error": "Processing error",
455
+ "message": f"An error occurred while retrieving stats: {str(e)}"
456
+ }), 500
457
+
458
+ @app.route('/api/download_eval_data', methods=['GET'])
459
+ def download_eval_data():
460
+ """Return the evaluation data as JSON for analysis"""
461
+ try:
462
+ if not os.path.exists(EVAL_CSV):
463
+ return jsonify({"error": "No evaluation data available yet"}), 404
464
+
465
+ with open(EVAL_CSV, 'r') as f:
466
+ reader = csv.DictReader(f)
467
+ rows = list(reader)
468
+
469
+ return jsonify({
470
+ "count": len(rows),
471
+ "data": rows
472
+ })
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error downloading evaluation data: {str(e)}", exc_info=True)
476
+ return jsonify({
477
+ "error": "Processing error",
478
+ "message": f"An error occurred: {str(e)}"
479
+ }), 500
480
+
481
+ #################################################
482
+ # Book Recommender Routes
483
+ #################################################
484
+
485
+ @app.route('/api/recommend', methods=['POST'])
486
+ def recommend():
487
+ """Endpoint to get book recommendations based on user query."""
488
+ global recommender_model_loaded
489
+
490
+ if not recommender_model_loaded:
491
+ return jsonify({
492
+ "error": "Model not loaded",
493
+ "message": "The recommendation model is not properly loaded."
494
+ }), 503
495
+
496
+ data = request.get_json()
497
+
498
+ if not data:
499
+ return jsonify({
500
+ "error": "Invalid request",
501
+ "message": "No JSON data provided."
502
+ }), 400
503
+
504
+ query = data.get('query')
505
+ top_n = data.get('top_n', 5)
506
+ include_description = data.get('include_description', True)
507
+ threshold = data.get('threshold', 0.5) # default threshold
508
+
509
+ if not query:
510
+ return jsonify({
511
+ "error": "Missing parameter",
512
+ "message": "Query parameter is required."
513
+ }), 400
514
+
515
+ try:
516
+ # Get recommendations
517
+ recommendations = recommender.recommend_books(
518
+ user_query=query,
519
+ top_n=int(top_n),
520
+ include_description=bool(include_description)
521
+ )
522
+
523
+ # Clean recommendations to make it JSON serializable
524
+ def clean_np(obj):
525
+ if isinstance(obj, np.integer):
526
+ return int(obj)
527
+ elif isinstance(obj, np.floating):
528
+ return float(obj)
529
+ elif isinstance(obj, np.ndarray):
530
+ return obj.tolist()
531
+ elif isinstance(obj, dict):
532
+ return {k: clean_np(v) for k, v in obj.items()}
533
+ elif isinstance(obj, list):
534
+ return [clean_np(i) for i in obj]
535
+ else:
536
+ return obj
537
+
538
+ recommendations_clean = clean_np(recommendations)
539
+
540
+ # Split based on threshold
541
+ high_score = [rec for rec in recommendations_clean if rec['relevance_score'] >= threshold]
542
+ low_score = [rec for rec in recommendations_clean if rec['relevance_score'] < threshold]
543
+
544
+ return jsonify({
545
+ "query": query,
546
+ "threshold": threshold,
547
+ "high_recommendations": high_score,
548
+ "low_recommendations": low_score,
549
+ "total_count": len(recommendations_clean),
550
+ "high_count": len(high_score),
551
+ "low_count": len(low_score)
552
+ })
553
+
554
+ except Exception as e:
555
+ logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
556
+ return jsonify({
557
+ "error": "Processing error",
558
+ "message": f"An error occurred while processing your request: {str(e)}"
559
+ }), 500
560
+
561
+ #################################################
562
+ # Main
563
+ #################################################
564
+
565
+ if __name__ == '__main__':
566
+ # Initialize models when the app starts
567
+ models_loaded = initialize_models()
568
+
569
+ # Set port from environment variable or default to 5000
570
+ port = int(os.environ.get('PORT', 5000))
571
+
572
+ # For development use debug=True, for production use debug=False
573
+ app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
574
+
575
+
576
+
577
+ #curl -X POST http://localhost:5000/api/analyze \-H "Content-Type: application/json" \-d '{"text": "cariin buku", "method": "combined"}'
578
+
579
+ #curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
580
+
app4.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ from flask_cors import CORS
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import numpy as np
8
+ import pickle
9
+ import os
10
+ import json
11
+ import logging
12
+ import csv
13
+ import re
14
+ import nltk
15
+ from nltk.corpus import stopwords
16
+ from nltk.stem import WordNetLemmatizer
17
+ from datetime import datetime
18
+
19
+ # Download necessary NLTK resources
20
+ nltk.download('stopwords', quiet=True)
21
+ nltk.download('punkt', quiet=True)
22
+ nltk.download('wordnet', quiet=True)
23
+
24
+ # Configure logging
25
+ logging.basicConfig(level=logging.INFO,
26
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
27
+ handlers=[logging.FileHandler("combined_api.log"),
28
+ logging.StreamHandler()])
29
+ logger = logging.getLogger(__name__)
30
+
31
+ app = Flask(__name__)
32
+ CORS(app) # Enable Cross-Origin Resource Sharing
33
+
34
+ # Global variables and constants
35
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
36
+ INTENT_MODEL_PATH = os.path.join(BASE_DIR, "model")
37
+ RECOMMENDER_MODEL_PATH = os.path.join(BASE_DIR, "recommender_model")
38
+ EVAL_CSV = "model_evaluation.csv"
39
+
40
+ # Global model variables
41
+ intent_model = None
42
+ intent_tokenizer = None
43
+ intent_classes = None
44
+ intent_thresholds = None
45
+ recommender = None
46
+ recommender_model_loaded = False
47
+
48
+ #################################################
49
+ # Book Recommender System
50
+ #################################################
51
+
52
+ class BookRecommender:
53
+ def __init__(self, model_name='all-minilm-l6-v2'):
54
+ """Initialize the book recommender with specified model."""
55
+ self.model_name = model_name
56
+ self.model = None
57
+ self.book_embeddings = None
58
+ self.df = None
59
+ self.stop_words = set(stopwords.words('english'))
60
+ self.lemmatizer = WordNetLemmatizer()
61
+ logger.info(f"BookRecommender initialized with model: {model_name}")
62
+
63
+ def preprocess_text(self, text):
64
+ """Advanced text preprocessing with stopword removal and lemmatization."""
65
+ if not isinstance(text, str):
66
+ return ""
67
+
68
+ # Convert to lowercase and remove special characters
69
+ text = text.lower()
70
+ text = re.sub(r'[^\w\s]', ' ', text)
71
+
72
+ # Tokenize, remove stopwords, and lemmatize
73
+ tokens = nltk.word_tokenize(text)
74
+ tokens = [self.lemmatizer.lemmatize(word) for word in tokens if word not in self.stop_words]
75
+
76
+ return ' '.join(tokens)
77
+
78
+ def load_model(self, folder_path=RECOMMENDER_MODEL_PATH):
79
+ """Load a previously saved model and embeddings for inference."""
80
+ try:
81
+ # Check if folder exists
82
+ if not os.path.exists(folder_path):
83
+ logger.error(f"Model folder {folder_path} does not exist.")
84
+ return False
85
+
86
+ # Load configuration
87
+ config_path = os.path.join(folder_path, "config.pkl")
88
+ with open(config_path, 'rb') as f:
89
+ config = pickle.load(f)
90
+ self.model_name = config['model_name']
91
+ logger.info(f"Loaded configuration: model_name={self.model_name}")
92
+
93
+ # Load the sentence transformer model
94
+ model_path = os.path.join(folder_path, "sentence_transformer")
95
+ self.model = SentenceTransformer(model_path)
96
+ logger.info(f"Model loaded from {model_path}")
97
+
98
+ # Load book embeddings
99
+ embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
100
+ with open(embeddings_path, 'rb') as f:
101
+ self.book_embeddings = pickle.load(f)
102
+ logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")
103
+
104
+ # Load the DataFrame
105
+ df_path = os.path.join(folder_path, "books_data.pkl")
106
+ with open(df_path, 'rb') as f:
107
+ self.df = pickle.load(f)
108
+ logger.info(f"DataFrame loaded: {len(self.df)} books")
109
+
110
+ return True
111
+
112
+ except Exception as e:
113
+ logger.error(f"Error loading model: {str(e)}", exc_info=True)
114
+ return False
115
+
116
+ def recommend_books(self, user_query, top_n=5, include_description=True):
117
+ """Recommend books based on user query."""
118
+ if self.model is None or self.book_embeddings is None or self.df is None:
119
+ logger.error("Model not initialized. Cannot make recommendations.")
120
+ return []
121
+
122
+ logger.info(f"Finding books similar to: '{user_query}'")
123
+
124
+ try:
125
+ # Preprocess the query the same way as the book text
126
+ processed_query = self.preprocess_text(user_query)
127
+
128
+ # Encode user query
129
+ user_embedding = self.model.encode([processed_query])
130
+
131
+ # Compute similarity between query and books
132
+ similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]
133
+
134
+ # Get top N most similar books
135
+ similar_books_idx = np.argsort(similarities)[-top_n:][::-1]
136
+
137
+ recommendations = []
138
+
139
+ for i, idx in enumerate(similar_books_idx):
140
+ book_data = {}
141
+
142
+ # Extract book information
143
+ if 'Title' in self.df.columns:
144
+ book_data['title'] = self.df.iloc[idx]['Title']
145
+
146
+ if 'Authors' in self.df.columns:
147
+ book_data['author'] = self.df.iloc[idx]['Authors']
148
+
149
+ if 'Category' in self.df.columns:
150
+ book_data['category'] = self.df.iloc[idx]['Category']
151
+
152
+ if 'Publish Date (Year)' in self.df.columns:
153
+ book_data['year'] = self.df.iloc[idx]['Publish Date (Year)']
154
+
155
+ if include_description and 'Description' in self.df.columns:
156
+ # Truncate long descriptions
157
+ description = self.df.iloc[idx]['Description']
158
+ if len(description) > 200:
159
+ description = description[:197] + "..."
160
+ book_data['description'] = description
161
+
162
+ # Add similarity score
163
+ book_data['relevance_score'] = float(similarities[idx])
164
+ book_data['rank'] = i + 1
165
+
166
+ recommendations.append(book_data)
167
+
168
+ logger.info(f"Successfully generated {len(recommendations)} recommendations")
169
+ return recommendations
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
173
+ return []
174
+
175
+ #################################################
176
+ # Intent Classification
177
+ #################################################
178
+
179
+ def setup_evaluation_csv():
180
+ """Set up the CSV file for tracking model performance"""
181
+ if not os.path.exists(EVAL_CSV):
182
+ with open(EVAL_CSV, 'w', newline='') as f:
183
+ writer = csv.writer(f)
184
+ writer.writerow([
185
+ 'timestamp',
186
+ 'input_text',
187
+ 'predicted_intent',
188
+ 'is_ood',
189
+ 'confidence',
190
+ 'energy_score',
191
+ 'detection_method'
192
+ ])
193
+ logger.info(f"Created evaluation CSV file: {EVAL_CSV}")
194
+
195
+ def save_prediction_to_csv(input_text, result, method):
196
+ """Save prediction results to CSV for later analysis"""
197
+ with open(EVAL_CSV, 'a', newline='') as f:
198
+ writer = csv.writer(f)
199
+ writer.writerow([
200
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
201
+ input_text,
202
+ result['intent'],
203
+ result['is_ood'],
204
+ result['confidence'],
205
+ result['energy_score'],
206
+ method
207
+ ])
208
+
209
+ def load_ood_thresholds(model_path):
210
+ """Load the OOD thresholds from the model directory"""
211
+ threshold_path = os.path.join(model_path, "ood_thresholds.json")
212
+
213
+ if os.path.exists(threshold_path):
214
+ with open(threshold_path, "r") as f:
215
+ return json.load(f)
216
+ else:
217
+ # Provide default thresholds if file not found
218
+ logger.warning(f"Threshold file not found at {threshold_path}. Using default values.")
219
+ return {
220
+ "energy_threshold": 0.0, # Replace with your default value
221
+ "msp_threshold": 0.5 # Replace with your default value
222
+ }
223
+
224
+ def load_intent_resources():
225
+ """Load model, tokenizer, intent classes, and thresholds for intent classification."""
226
+ global intent_model, intent_tokenizer, intent_classes, intent_thresholds
227
+
228
+ logger.info(f"Loading intent resources from {INTENT_MODEL_PATH}...")
229
+
230
+ try:
231
+ # Load model and tokenizer
232
+ intent_model = AutoModelForSequenceClassification.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
233
+ intent_tokenizer = AutoTokenizer.from_pretrained("ZEROTSUDIOS/Bipa-Classification")
234
+
235
+ # Load intent classes
236
+ intent_classes_path = os.path.join(INTENT_MODEL_PATH, "intent_classes.pkl")
237
+ if os.path.exists(intent_classes_path):
238
+ with open(intent_classes_path, "rb") as f:
239
+ intent_classes = pickle.load(f)
240
+ else:
241
+ raise FileNotFoundError(f"Intent classes file not found at {intent_classes_path}")
242
+
243
+ # Load OOD thresholds
244
+ intent_thresholds = load_ood_thresholds(INTENT_MODEL_PATH)
245
+
246
+ logger.info("Intent resources loaded successfully")
247
+ logger.info(f"Loaded {len(intent_classes)} intent classes")
248
+ logger.info(f"Thresholds: {intent_thresholds}")
249
+ return True
250
+
251
+ except Exception as e:
252
+ logger.error(f"Failed to load intent resources: {str(e)}", exc_info=True)
253
+ return False
254
+
255
+ def predict_intent_with_enhanced_ood(text, model, tokenizer, intent_classes,
256
+ energy_threshold, msp_threshold, method='combined'):
257
+ """
258
+ Predict intent with enhanced out-of-distribution detection and detailed logging.
259
+ """
260
+ logger.info("\n========== INTENT PREDICTION DEBUG ==========")
261
+ logger.info(f"Input Text: {text}")
262
+ logger.info(f"Detection Method: {method}")
263
+
264
+ # Tokenize input
265
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
266
+
267
+ # Get model outputs
268
+ with torch.no_grad():
269
+ outputs = model(**inputs)
270
+ logits = outputs.logits
271
+
272
+ logger.info(f"Logits: {logits.numpy().tolist()}")
273
+
274
+ # Get probabilities
275
+ probs = torch.nn.functional.softmax(logits, dim=-1)
276
+ max_prob, pred_idx = torch.max(probs, dim=-1)
277
+
278
+ logger.info(f"Softmax Probabilities: {probs.numpy().tolist()}")
279
+ logger.info(f"Max Probability (Confidence): {max_prob.item():.4f}")
280
+ logger.info(f"Predicted Index: {pred_idx.item()}")
281
+
282
+ # Calculate energy score
283
+ energy = -torch.logsumexp(logits, dim=-1)
284
+ logger.info(f"Energy Score: {energy.item():.4f}")
285
+
286
+ # OOD detection
287
+ is_ood = False
288
+ if method == 'energy':
289
+ is_ood = energy.item() > energy_threshold
290
+ elif method == 'msp':
291
+ is_ood = max_prob.item() < msp_threshold
292
+ elif method == 'combined':
293
+ is_ood = (energy.item() > energy_threshold) and (max_prob.item() < msp_threshold)
294
+
295
+ logger.info(f"OOD Detection -> is_ood: {is_ood}")
296
+ if is_ood:
297
+ logger.info("Prediction marked as OUT-OF-DISTRIBUTION.")
298
+ else:
299
+ logger.info("Prediction marked as IN-DISTRIBUTION.")
300
+
301
+ # Get intent label
302
+ predicted_intent = intent_classes[pred_idx.item()]
303
+ logger.info(f"Predicted Intent: {predicted_intent}")
304
+ logger.info("=============================================\n")
305
+
306
+ return {
307
+ "intent": predicted_intent,
308
+ "is_ood": is_ood,
309
+ "confidence": max_prob.item(),
310
+ "energy_score": energy.item(),
311
+ # Add all class probabilities for detailed analysis
312
+ "class_probabilities": {
313
+ intent_classes[i]: float(prob)
314
+ for i, prob in enumerate(probs[0].numpy())
315
+ }
316
+ }
317
+
318
+ #################################################
319
+ # Server Initialization
320
+ #################################################
321
+
322
+ def initialize_models():
323
+ """Load all required models on startup."""
324
+ global recommender, recommender_model_loaded
325
+
326
+ # Create evaluation CSV if it doesn't exist
327
+ setup_evaluation_csv()
328
+
329
+ # Load intent classification model
330
+ intent_model_loaded = load_intent_resources()
331
+ if intent_model_loaded:
332
+ logger.info("Intent classification model loaded successfully!")
333
+ else:
334
+ logger.error("Failed to load intent model.")
335
+
336
+ # Initialize book recommender
337
+ recommender = BookRecommender()
338
+ recommender_model_loaded = recommender.load_model()
339
+ if recommender_model_loaded:
340
+ logger.info("Book recommendation model loaded successfully!")
341
+ else:
342
+ logger.error("Failed to load book recommendation model.")
343
+
344
+ return intent_model_loaded and recommender_model_loaded
345
+
346
+ #################################################
347
+ # API Routes
348
+ #################################################
349
+
350
+ @app.route('/api/health', methods=['GET'])
351
+ def health_check():
352
+ """Endpoint to check if the API is running and models are loaded."""
353
+ intent_models_loaded = intent_model is not None and intent_tokenizer is not None
354
+
355
+ return jsonify({
356
+ "status": "healthy" if (intent_models_loaded and recommender_model_loaded) else "partially_healthy" if (intent_models_loaded or recommender_model_loaded) else "unhealthy",
357
+ "intent_model_loaded": intent_models_loaded,
358
+ "recommender_model_loaded": recommender_model_loaded,
359
+ "available_endpoints": [
360
+ "/api/health",
361
+ "/api/analyze",
362
+ "/api/recommend",
363
+ "/api/stats",
364
+ "/api/download_eval_data"
365
+ ]
366
+ })
367
+
368
+ #################################################
369
+ # Intent Classification Routes
370
+ #################################################
371
+
372
+ @app.route('/api/analyze', methods=['POST'])
373
+ def analyze():
374
+ """Endpoint to predict intent from text."""
375
+ # Check if request contains JSON
376
+ if not request.is_json:
377
+ return jsonify({"error": "Request must be JSON"}), 400
378
+
379
+ # Get text from request
380
+ data = request.get_json()
381
+ if 'text' not in data:
382
+ return jsonify({"error": "Missing 'text' field in request"}), 400
383
+
384
+ text = data['text']
385
+
386
+ # Default to combined method unless specified
387
+ method = data.get('method', 'combined')
388
+ if method not in ['energy', 'msp', 'combined']:
389
+ return jsonify({"error": "Invalid method. Must be 'energy', 'msp', or 'combined'"}), 400
390
+
391
+ # Make prediction
392
+ result = predict_intent_with_enhanced_ood(
393
+ text,
394
+ intent_model,
395
+ intent_tokenizer,
396
+ intent_classes,
397
+ intent_thresholds["energy_threshold"],
398
+ intent_thresholds["msp_threshold"],
399
+ method=method
400
+ )
401
+
402
+ # Save result to CSV for evaluation
403
+ save_prediction_to_csv(text, result, method)
404
+
405
+ # Return prediction as JSON
406
+ return jsonify(result)
407
+
408
+ @app.route('/api/stats', methods=['GET'])
409
+ def get_stats():
410
+ """Get statistics about model usage and predictions."""
411
+ try:
412
+ stats = {
413
+ "intent_model_info": {
414
+ "num_intent_classes": len(intent_classes) if intent_classes else 0,
415
+ "model_path": INTENT_MODEL_PATH,
416
+ "thresholds": intent_thresholds
417
+ },
418
+ "recommender_model_info": {
419
+ "model_name": recommender.model_name if recommender else None,
420
+ "num_books": len(recommender.df) if recommender and recommender.df is not None else 0
421
+ },
422
+ "usage": {}
423
+ }
424
+
425
+ # Read CSV to generate statistics if it exists
426
+ if os.path.exists(EVAL_CSV):
427
+ with open(EVAL_CSV, 'r') as f:
428
+ reader = csv.DictReader(f)
429
+ rows = list(reader)
430
+
431
+ stats["usage"] = {
432
+ "total_queries": len(rows),
433
+ "ood_count": sum(1 for row in rows if row["is_ood"] == "True"),
434
+ "top_intents": {}
435
+ }
436
+
437
+ # Count intents for statistical analysis
438
+ intent_counts = {}
439
+ for row in rows:
440
+ intent = row["predicted_intent"]
441
+ if intent not in intent_counts:
442
+ intent_counts[intent] = 0
443
+ intent_counts[intent] += 1
444
+
445
+ # Get top 5 intents
446
+ top_intents = sorted(intent_counts.items(), key=lambda x: x[1], reverse=True)[:5]
447
+ stats["usage"]["top_intents"] = dict(top_intents)
448
+
449
+ return jsonify(stats)
450
+
451
+ except Exception as e:
452
+ logger.error(f"Error in stats endpoint: {str(e)}", exc_info=True)
453
+ return jsonify({
454
+ "error": "Processing error",
455
+ "message": f"An error occurred while retrieving stats: {str(e)}"
456
+ }), 500
457
+
458
+ @app.route('/api/download_eval_data', methods=['GET'])
459
+ def download_eval_data():
460
+ """Return the evaluation data as JSON for analysis"""
461
+ try:
462
+ if not os.path.exists(EVAL_CSV):
463
+ return jsonify({"error": "No evaluation data available yet"}), 404
464
+
465
+ with open(EVAL_CSV, 'r') as f:
466
+ reader = csv.DictReader(f)
467
+ rows = list(reader)
468
+
469
+ return jsonify({
470
+ "count": len(rows),
471
+ "data": rows
472
+ })
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error downloading evaluation data: {str(e)}", exc_info=True)
476
+ return jsonify({
477
+ "error": "Processing error",
478
+ "message": f"An error occurred: {str(e)}"
479
+ }), 500
480
+
481
+ #################################################
482
+ # Book Recommender Routes
483
+ #################################################
484
+
485
+ @app.route('/api/recommend', methods=['POST'])
486
+ def recommend():
487
+ """Endpoint to get book recommendations based on user query."""
488
+ global recommender_model_loaded
489
+
490
+ if not recommender_model_loaded:
491
+ return jsonify({
492
+ "error": "Model not loaded",
493
+ "message": "The recommendation model is not properly loaded."
494
+ }), 503
495
+
496
+ data = request.get_json()
497
+
498
+ if not data:
499
+ return jsonify({
500
+ "error": "Invalid request",
501
+ "message": "No JSON data provided."
502
+ }), 400
503
+
504
+ query = data.get('query')
505
+ top_n = data.get('top_n', 5)
506
+ include_description = data.get('include_description', True)
507
+ threshold = data.get('threshold', 0.5) # default threshold
508
+
509
+ if not query:
510
+ return jsonify({
511
+ "error": "Missing parameter",
512
+ "message": "Query parameter is required."
513
+ }), 400
514
+
515
+ try:
516
+ # Get recommendations
517
+ recommendations = recommender.recommend_books(
518
+ user_query=query,
519
+ top_n=int(top_n),
520
+ include_description=bool(include_description)
521
+ )
522
+
523
+ # Clean recommendations to make it JSON serializable
524
+ def clean_np(obj):
525
+ if isinstance(obj, np.integer):
526
+ return int(obj)
527
+ elif isinstance(obj, np.floating):
528
+ return float(obj)
529
+ elif isinstance(obj, np.ndarray):
530
+ return obj.tolist()
531
+ elif isinstance(obj, dict):
532
+ return {k: clean_np(v) for k, v in obj.items()}
533
+ elif isinstance(obj, list):
534
+ return [clean_np(i) for i in obj]
535
+ else:
536
+ return obj
537
+
538
+ recommendations_clean = clean_np(recommendations)
539
+
540
+ # Split based on threshold
541
+ high_score = [rec for rec in recommendations_clean if rec['relevance_score'] >= threshold]
542
+ low_score = [rec for rec in recommendations_clean if rec['relevance_score'] < threshold]
543
+
544
+ return jsonify({
545
+ "query": query,
546
+ "threshold": threshold,
547
+ "high_recommendations": high_score,
548
+ "low_recommendations": low_score,
549
+ "total_count": len(recommendations_clean),
550
+ "high_count": len(high_score),
551
+ "low_count": len(low_score)
552
+ })
553
+
554
+ except Exception as e:
555
+ logger.error(f"Error in recommendation endpoint: {str(e)}", exc_info=True)
556
+ return jsonify({
557
+ "error": "Processing error",
558
+ "message": f"An error occurred while processing your request: {str(e)}"
559
+ }), 500
560
+
561
+ #################################################
562
+ # Main
563
+ #################################################
564
+
565
+ if __name__ == '__main__':
566
+ # Initialize models when the app starts
567
+ models_loaded = initialize_models()
568
+
569
+ # Set port from environment variable or default to 5000
570
+ port = int(os.environ.get('PORT', 5000))
571
+
572
+ # For development use debug=True, for production use debug=False
573
+ app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
574
+
575
+
576
+
577
+ #curl -X POST http://localhost:5000/api/analyze \-H "Content-Type: application/json" \-d '{"text": "cariin buku", "method": "combined"}'
578
+
579
+ #curl -X POST http://localhost:5000/api/recommend \-H "Content-Type: application/json" \-d '{"query": "programming for begginers","top_n": 10,"include_description": true}'
580
+
combined_api.log ADDED
The diff for this file is too large to render. See raw diff
 
evaluate_model.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ Intent Classification Model Evaluator
4
+
5
+ This script analyzes the CSV data produced by the Flask app to generate
6
+ evaluation metrics and visualizations.
7
+ """
8
+
9
+ import pandas as pd
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+ import os
13
+ import argparse
14
+ from datetime import datetime
15
+
16
+
17
+ def load_evaluation_data(csv_path="model_evaluation.csv"):
18
+ """Load the CSV data and do basic preprocessing"""
19
+ if not os.path.exists(csv_path):
20
+ print(f"Error: File {csv_path} not found")
21
+ return None
22
+
23
+ # Load the data
24
+ df = pd.read_csv(csv_path)
25
+
26
+ # Convert string boolean to actual boolean
27
+ #df['is_ood'] = df['is_ood'].apply(lambda x: x.lower() == 'true')
28
+ df['is_ood'] = df['is_ood'].apply(lambda x: str(x).lower() == 'true')
29
+
30
+ # Convert timestamp to datetime
31
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
32
+
33
+ print(f"Loaded {len(df)} evaluation records")
34
+ return df
35
+
36
+
37
+ def generate_basic_stats(df):
38
+ """Generate basic statistics from the evaluation data"""
39
+ if df is None or len(df) == 0:
40
+ return "No data available for analysis"
41
+
42
+ stats = {
43
+ "total_queries": len(df),
44
+ "unique_queries": df['input_text'].nunique(),
45
+ "in_distribution_count": (~df['is_ood']).sum(),
46
+ "out_of_distribution_count": df['is_ood'].sum(),
47
+ "ood_percentage": df['is_ood'].mean() * 100,
48
+ "avg_confidence": df['confidence'].mean(),
49
+ "avg_energy_score": df['energy_score'].mean(),
50
+ "top_intents": df['predicted_intent'].value_counts().head(10).to_dict()
51
+ }
52
+
53
+ # Calculate metrics grouped by detection method
54
+ method_stats = df.groupby('detection_method').agg({
55
+ 'is_ood': ['mean', 'count'],
56
+ 'confidence': ['mean', 'std'],
57
+ 'energy_score': ['mean', 'std']
58
+ })
59
+
60
+ return stats, method_stats
61
+
62
+
63
+ def plot_distributions(df, output_dir="evaluation_plots"):
64
+ """Create plots for analyzing the model performance"""
65
+ if df is None or len(df) == 0:
66
+ print("No data available for plotting")
67
+ return
68
+
69
+ # Create output directory if it doesn't exist
70
+ os.makedirs(output_dir, exist_ok=True)
71
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
72
+
73
+ # Plot 1: Confidence Distribution
74
+ plt.figure(figsize=(10, 6))
75
+ plt.hist(df[~df['is_ood']]['confidence'], bins=20, alpha=0.7, label='In-Distribution')
76
+ plt.hist(df[df['is_ood']]['confidence'], bins=20, alpha=0.7, label='Out-of-Distribution')
77
+ plt.xlabel('Confidence Score')
78
+ plt.ylabel('Count')
79
+ plt.title('Confidence Score Distribution')
80
+ plt.legend()
81
+ plt.savefig(os.path.join(output_dir, f"{timestamp}_confidence_distribution.png"))
82
+
83
+ # Plot 2: Energy Score Distribution
84
+ plt.figure(figsize=(10, 6))
85
+ plt.hist(df[~df['is_ood']]['energy_score'], bins=20, alpha=0.7, label='In-Distribution')
86
+ plt.hist(df[df['is_ood']]['energy_score'], bins=20, alpha=0.7, label='Out-of-Distribution')
87
+ plt.xlabel('Energy Score')
88
+ plt.ylabel('Count')
89
+ plt.title('Energy Score Distribution')
90
+ plt.legend()
91
+ plt.savefig(os.path.join(output_dir, f"{timestamp}_energy_distribution.png"))
92
+
93
+ # Plot 3: Intent Distribution (Top 10)
94
+ intent_counts = df['predicted_intent'].value_counts().head(10)
95
+ plt.figure(figsize=(12, 6))
96
+ intent_counts.plot(kind='bar')
97
+ plt.xlabel('Intent')
98
+ plt.ylabel('Count')
99
+ plt.title('Top 10 Predicted Intents')
100
+ plt.xticks(rotation=45, ha='right')
101
+ plt.tight_layout()
102
+ plt.savefig(os.path.join(output_dir, f"{timestamp}_intent_distribution.png"))
103
+
104
+ # Plot 4: OOD Detection Method Comparison
105
+ plt.figure(figsize=(10, 6))
106
+ method_ood = df.groupby('detection_method')['is_ood'].mean() * 100
107
+ method_ood.plot(kind='bar')
108
+ plt.xlabel('Detection Method')
109
+ plt.ylabel('OOD Percentage')
110
+ plt.title('OOD Detection Rate by Method')
111
+ plt.savefig(os.path.join(output_dir, f"{timestamp}_ood_by_method.png"))
112
+
113
+ print(f"Plots saved to {output_dir} directory")
114
+
115
+
116
+ def analyze_inputs(df):
117
+ """Analyze input texts for patterns"""
118
+ if df is None or len(df) == 0:
119
+ return "No data available for analysis"
120
+
121
+ # Basic text statistics
122
+ df['text_length'] = df['input_text'].apply(len)
123
+ df['word_count'] = df['input_text'].apply(lambda x: len(x.split()))
124
+
125
+ text_stats = {
126
+ "avg_text_length": df['text_length'].mean(),
127
+ "avg_word_count": df['word_count'].mean(),
128
+ "max_text_length": df['text_length'].max(),
129
+ "min_text_length": df['text_length'].min()
130
+ }
131
+
132
+ # Analyze correlation between text length and predictions
133
+ length_vs_ood = df.groupby(pd.cut(df['text_length'], 10))['is_ood'].mean()
134
+ length_vs_confidence = df.groupby(pd.cut(df['text_length'], 10))['confidence'].mean()
135
+
136
+ print("\nInput Text Analysis:")
137
+ print(f"Average text length: {text_stats['avg_text_length']:.1f} characters")
138
+ print(f"Average word count: {text_stats['avg_word_count']:.1f} words")
139
+
140
+ return text_stats, length_vs_ood, length_vs_confidence
141
+
142
+
143
+ def suggest_thresholds(df):
144
+ """Analyze the data to suggest optimal thresholds for OOD detection"""
145
+ if df is None or len(df) == 0 or len(df['is_ood'].unique()) < 2:
146
+ return "Insufficient data for threshold analysis - need both OOD and non-OOD examples"
147
+
148
+ # Simple suggestion based on average values
149
+ suggested_energy = np.mean([
150
+ df[df['is_ood']]['energy_score'].mean(),
151
+ df[~df['is_ood']]['energy_score'].mean()
152
+ ])
153
+
154
+ suggested_msp = np.mean([
155
+ df[df['is_ood']]['confidence'].mean(),
156
+ df[~df['is_ood']]['confidence'].mean()
157
+ ])
158
+
159
+ print("\nThreshold Suggestions:")
160
+ print(f"Current data suggests an energy threshold around: {suggested_energy:.4f}")
161
+ print(f"Current data suggests an MSP threshold around: {suggested_msp:.4f}")
162
+ print("Note: These are rough estimates. For proper threshold tuning,")
163
+ print("you should use a dedicated validation set and ROC curve analysis.")
164
+
165
+ return suggested_energy, suggested_msp
166
+
167
+
168
+ def main():
169
+ parser = argparse.ArgumentParser(description="Analyze intent classification evaluation data")
170
+ parser.add_argument('--csv', default='model_evaluation.csv', help='Path to the evaluation CSV file')
171
+ parser.add_argument('--plots', default='evaluation_plots', help='Directory to save plots')
172
+ parser.add_argument('--no-plots', action='store_true', help='Skip generating plots')
173
+ args = parser.parse_args()
174
+
175
+ print(f"Loading data from {args.csv}...")
176
+ df = load_evaluation_data(args.csv)
177
+
178
+ if df is not None and len(df) > 0:
179
+ print("\n===== BASIC STATISTICS =====")
180
+ stats, method_stats = generate_basic_stats(df)
181
+ print(f"Total queries: {stats['total_queries']}")
182
+ print(f"In-distribution queries: {stats['in_distribution_count']} ({100-stats['ood_percentage']:.1f}%)")
183
+ print(f"Out-of-distribution queries: {stats['out_of_distribution_count']} ({stats['ood_percentage']:.1f}%)")
184
+ print(f"Average confidence score: {stats['avg_confidence']:.4f}")
185
+ print(f"Average energy score: {stats['avg_energy_score']:.4f}")
186
+
187
+ print("\nTop predicted intents:")
188
+ for intent, count in list(stats['top_intents'].items())[:5]:
189
+ print(f" - {intent}: {count}")
190
+
191
+ print("\n===== DETECTION METHOD COMPARISON =====")
192
+ print(method_stats)
193
+
194
+ # Analyze input texts
195
+ analyze_inputs(df)
196
+
197
+ # Suggest threshold values
198
+ suggest_thresholds(df)
199
+
200
+ # Generate plots if not disabled
201
+ if not args.no_plots:
202
+ plot_distributions(df, args.plots)
203
+
204
+ print("\nAnalysis complete!")
205
+
206
+
207
+ if __name__ == "__main__":
208
+ main()
evaluation_plots/20250515_142829_confidence_distribution.png ADDED
evaluation_plots/20250515_142829_energy_distribution.png ADDED
evaluation_plots/20250515_142829_intent_distribution.png ADDED
evaluation_plots/20250515_142829_ood_by_method.png ADDED
hf.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import login, create_repo, upload_folder
2
+
3
+ # 1. 🔐 Login using your HF token (get it from https://huggingface.co/settings/tokens)
4
+ login("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") # <--- ganti dengan token kamu
5
+
6
+ # 2. 📁 Create repo on Hugging Face (if belum dibuat). Ganti nama repo sesuai keinginan.
7
+ repo_name = "Bipa-Classification" # bebas, asal unik di akunmu
8
+ create_repo(repo_name, private=False)
9
+
10
+ # 3. 🚀 Upload the model folder
11
+ upload_folder(
12
+ folder_path="./model", # this path is correct from your working dir
13
+ path_in_repo="", # upload everything into root of repo
14
+ repo_id="ZEROTSUDIOS/" + repo_name, # <--- ganti your_username
15
+ repo_type="model"
16
+ )
17
+
18
+ print("✅ Upload completed!")
intent_api.log ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-10 20:51:10,272 - __main__ - INFO - Created evaluation CSV file: model_evaluation.csv
2
+ 2025-05-10 20:51:10,275 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
3
+ 2025-05-10 20:52:12,106 - __main__ - INFO - Intent resources loaded successfully
4
+ 2025-05-10 20:52:12,108 - __main__ - INFO - Loaded 9 intent classes
5
+ 2025-05-10 20:52:12,113 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
6
+ 2025-05-10 20:52:12,114 - __main__ - INFO - Intent classification model loaded successfully!
7
+ 2025-05-10 20:52:13,087 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
8
+ * Running on all addresses (0.0.0.0)
9
+ * Running on http://127.0.0.1:5000
10
+ * Running on http://192.168.1.9:5000
11
+ 2025-05-10 20:52:13,088 - werkzeug - INFO - Press CTRL+C to quit
12
+ 2025-05-10 20:52:13,148 - werkzeug - INFO - * Restarting with stat
13
+ 2025-05-10 20:52:43,569 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
14
+ 2025-05-10 20:53:35,798 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
15
+ 2025-05-10 20:53:38,986 - __main__ - INFO - Intent resources loaded successfully
16
+ 2025-05-10 20:53:38,986 - __main__ - INFO - Loaded 9 intent classes
17
+ 2025-05-10 20:53:38,986 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
18
+ 2025-05-10 20:53:38,986 - __main__ - INFO - Intent classification model loaded successfully!
19
+ 2025-05-10 20:53:39,220 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
20
+ * Running on all addresses (0.0.0.0)
21
+ * Running on http://127.0.0.1:5000
22
+ * Running on http://192.168.1.9:5000
23
+ 2025-05-10 20:53:39,220 - werkzeug - INFO - Press CTRL+C to quit
24
+ 2025-05-10 20:53:39,236 - werkzeug - INFO - * Restarting with stat
25
+ 2025-05-10 20:53:49,002 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
26
+ 2025-05-10 21:03:09,865 - __main__ - INFO - Loading intent resources from C:\xampp\htdocs\ChatbotPerpusBipa\py\model...
27
+ 2025-05-10 21:03:13,006 - __main__ - INFO - Intent resources loaded successfully
28
+ 2025-05-10 21:03:13,006 - __main__ - INFO - Loaded 9 intent classes
29
+ 2025-05-10 21:03:13,006 - __main__ - INFO - Thresholds: {'energy_threshold': -5.720269680023193, 'msp_threshold': 0.8734092712402344}
30
+ 2025-05-10 21:03:13,006 - __main__ - INFO - Intent classification model loaded successfully!
31
+ 2025-05-10 21:03:13,037 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
32
+ * Running on all addresses (0.0.0.0)
33
+ * Running on http://127.0.0.1:5000
34
+ * Running on http://192.168.1.9:5000
35
+ 2025-05-10 21:03:13,037 - werkzeug - INFO - Press CTRL+C to quit
36
+ 2025-05-10 21:04:34,421 - __main__ - INFO -
37
+ ========== INTENT PREDICTION DEBUG ==========
38
+ 2025-05-10 21:04:34,421 - __main__ - INFO - Input Text: hi
39
+ 2025-05-10 21:04:34,421 - __main__ - INFO - Detection Method: combined
40
+ 2025-05-10 21:04:43,102 - __main__ - INFO - Logits: [[-1.3176556825637817, -1.1946855783462524, -0.858184278011322, -1.1091588735580444, -1.0160118341445923, 1.3968093395233154, 5.099667072296143, -1.0186958312988281, -0.7325793504714966]]
41
+ 2025-05-10 21:04:43,618 - __main__ - INFO - Softmax Probabilities: [[0.0015701063675805926, 0.0017755558947101235, 0.0024858498945832253, 0.0019340959843248129, 0.0021229088306427, 0.023703157901763916, 0.9614725708961487, 0.0021172184497117996, 0.0028185418341308832]]
42
+ 2025-05-10 21:04:43,633 - __main__ - INFO - Max Probability (Confidence): 0.9615
43
+ 2025-05-10 21:04:43,633 - __main__ - INFO - Predicted Index: 6
44
+ 2025-05-10 21:04:44,086 - __main__ - INFO - Energy Score: -5.1390
45
+ 2025-05-10 21:04:44,086 - __main__ - INFO - OOD Detection -> is_ood: False
46
+ 2025-05-10 21:04:44,086 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
47
+ 2025-05-10 21:04:44,086 - __main__ - INFO - Predicted Intent: greeting
48
+ 2025-05-10 21:04:44,086 - __main__ - INFO - =============================================
49
+
50
+ 2025-05-10 21:04:44,086 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:04:44] "POST /api/analyze HTTP/1.1" 200 -
51
+ 2025-05-10 21:10:56,497 - __main__ - INFO -
52
+ ========== INTENT PREDICTION DEBUG ==========
53
+ 2025-05-10 21:10:56,498 - __main__ - INFO - Input Text: selamat pagi, saya inginn tau cara meminjam buku boleh? makasih
54
+ 2025-05-10 21:10:56,499 - __main__ - INFO - Detection Method: combined
55
+ 2025-05-10 21:10:58,421 - __main__ - INFO - Logits: [[5.399814605712891, 0.087623231112957, -0.8307769298553467, -0.6626364588737488, -0.7578122615814209, 0.10144484788179398, -0.7953941226005554, -0.3702244460582733, -1.1888351440429688]]
56
+ 2025-05-10 21:10:58,423 - __main__ - INFO - Softmax Probabilities: [[0.9776320457458496, 0.0048208096995949745, 0.001924260170198977, 0.002276598708704114, 0.0020699123851954937, 0.0048879035748541355, 0.0019935655873268843, 0.0030498558189719915, 0.0013451204868033528]]
57
+ 2025-05-10 21:10:58,424 - __main__ - INFO - Max Probability (Confidence): 0.9776
58
+ 2025-05-10 21:10:58,427 - __main__ - INFO - Predicted Index: 0
59
+ 2025-05-10 21:10:58,430 - __main__ - INFO - Energy Score: -5.4224
60
+ 2025-05-10 21:10:58,432 - __main__ - INFO - OOD Detection -> is_ood: False
61
+ 2025-05-10 21:10:58,437 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
62
+ 2025-05-10 21:10:58,440 - __main__ - INFO - Predicted Intent: cara_pinjam
63
+ 2025-05-10 21:10:58,442 - __main__ - INFO - =============================================
64
+
65
+ 2025-05-10 21:10:58,449 - test - ERROR - Exception on /api/analyze [POST]
66
+ Traceback (most recent call last):
67
+ File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 1473, in wsgi_app
68
+ response = self.full_dispatch_request()
69
+ File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 882, in full_dispatch_request
70
+ rv = self.handle_user_exception(e)
71
+ File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask_cors\extension.py", line 176, in wrapped_function
72
+ return cors_after_request(app.make_response(f(*args, **kwargs)))
73
+ File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 880, in full_dispatch_request
74
+ rv = self.dispatch_request()
75
+ File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\flask\app.py", line 865, in dispatch_request
76
+ return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
77
+ File "C:\xampp\htdocs\ChatbotPerpusBipa\py\test.py", line 234, in analyze
78
+ save_prediction_to_csv(text, result, method)
79
+ File "C:\xampp\htdocs\ChatbotPerpusBipa\py\test.py", line 52, in save_prediction_to_csv
80
+ with open(EVAL_CSV, 'a', newline='') as f:
81
+ PermissionError: [Errno 13] Permission denied: 'model_evaluation.csv'
82
+ 2025-05-10 21:11:03,159 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:11:03] "POST /api/analyze HTTP/1.1" 500 -
83
+ 2025-05-10 21:11:30,805 - __main__ - INFO -
84
+ ========== INTENT PREDICTION DEBUG ==========
85
+ 2025-05-10 21:11:30,806 - __main__ - INFO - Input Text: selamat pagi, saya inginn tau cara meminjam buku boleh? makasih
86
+ 2025-05-10 21:11:30,807 - __main__ - INFO - Detection Method: combined
87
+ 2025-05-10 21:11:32,361 - __main__ - INFO - Logits: [[5.399814605712891, 0.087623231112957, -0.8307769298553467, -0.6626364588737488, -0.7578122615814209, 0.10144484788179398, -0.7953941226005554, -0.3702244460582733, -1.1888351440429688]]
88
+ 2025-05-10 21:11:32,365 - __main__ - INFO - Softmax Probabilities: [[0.9776320457458496, 0.0048208096995949745, 0.001924260170198977, 0.002276598708704114, 0.0020699123851954937, 0.0048879035748541355, 0.0019935655873268843, 0.0030498558189719915, 0.0013451204868033528]]
89
+ 2025-05-10 21:11:32,370 - __main__ - INFO - Max Probability (Confidence): 0.9776
90
+ 2025-05-10 21:11:32,373 - __main__ - INFO - Predicted Index: 0
91
+ 2025-05-10 21:11:32,379 - __main__ - INFO - Energy Score: -5.4224
92
+ 2025-05-10 21:11:32,387 - __main__ - INFO - OOD Detection -> is_ood: False
93
+ 2025-05-10 21:11:32,390 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
94
+ 2025-05-10 21:11:32,393 - __main__ - INFO - Predicted Intent: cara_pinjam
95
+ 2025-05-10 21:11:32,397 - __main__ - INFO - =============================================
96
+
97
+ 2025-05-10 21:11:32,412 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:11:32] "POST /api/analyze HTTP/1.1" 200 -
98
+ 2025-05-10 21:20:06,316 - __main__ - INFO -
99
+ ========== INTENT PREDICTION DEBUG ==========
100
+ 2025-05-10 21:20:06,316 - __main__ - INFO - Input Text: hello nigga
101
+ 2025-05-10 21:20:06,316 - __main__ - INFO - Detection Method: combined
102
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Logits: [[-1.059308648109436, -1.1259849071502686, -0.7732775807380676, -1.1819493770599365, -1.1710785627365112, 1.4521418809890747, 5.11907958984375, -1.0589956045150757, -1.0377000570297241]]
103
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Softmax Probabilities: [[0.0019936026073992252, 0.0018650107085704803, 0.0026537510566413403, 0.0017635031836107373, 0.001782778650522232, 0.02456674538552761, 0.9613432288169861, 0.001994226360693574, 0.002037149854004383]]
104
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Max Probability (Confidence): 0.9613
105
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Predicted Index: 6
106
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Energy Score: -5.1585
107
+ 2025-05-10 21:20:07,046 - __main__ - INFO - OOD Detection -> is_ood: False
108
+ 2025-05-10 21:20:07,046 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
109
+ 2025-05-10 21:20:07,062 - __main__ - INFO - Predicted Intent: greeting
110
+ 2025-05-10 21:20:07,062 - __main__ - INFO - =============================================
111
+
112
+ 2025-05-10 21:20:07,062 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:20:07] "POST /api/analyze HTTP/1.1" 200 -
113
+ 2025-05-10 21:20:12,732 - __main__ - INFO -
114
+ ========== INTENT PREDICTION DEBUG ==========
115
+ 2025-05-10 21:20:12,732 - __main__ - INFO - Input Text: jawir
116
+ 2025-05-10 21:20:12,732 - __main__ - INFO - Detection Method: combined
117
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Logits: [[-1.6685775518417358, -1.273998737335205, 2.2453248500823975, 3.137083053588867, -0.7543082237243652, -1.4736096858978271, 2.1617510318756104, -1.6372283697128296, -1.1733094453811646]]
118
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Softmax Probabilities: [[0.004400658421218395, 0.006529518403112888, 0.2204468846321106, 0.537761390209198, 0.010979431681334972, 0.0053479960188269615, 0.20277215540409088, 0.004540801048278809, 0.007221210282295942]]
119
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Max Probability (Confidence): 0.5378
120
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Predicted Index: 3
121
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Energy Score: -3.7574
122
+ 2025-05-10 21:20:13,156 - __main__ - INFO - OOD Detection -> is_ood: True
123
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
124
+ 2025-05-10 21:20:13,156 - __main__ - INFO - Predicted Intent: unknown
125
+ 2025-05-10 21:20:13,171 - __main__ - INFO - =============================================
126
+
127
+ 2025-05-10 21:20:13,249 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:20:13] "POST /api/analyze HTTP/1.1" 200 -
128
+ 2025-05-10 21:23:46,590 - __main__ - INFO -
129
+ ========== INTENT PREDICTION DEBUG ==========
130
+ 2025-05-10 21:23:46,592 - __main__ - INFO - Input Text: aku pergi
131
+ 2025-05-10 21:23:46,593 - __main__ - INFO - Detection Method: combined
132
+ 2025-05-10 21:23:49,019 - __main__ - INFO - Logits: [[-0.9422286748886108, -0.8617181777954102, -1.3514978885650635, 0.15106375515460968, -1.3091967105865479, 4.960031509399414, 0.3698965013027191, -0.15400801599025726, -0.9507330656051636]]
133
+ 2025-05-10 21:23:49,113 - __main__ - INFO - Softmax Probabilities: [[0.0026371763087809086, 0.002858277875930071, 0.0017514426726847887, 0.007869554683566093, 0.0018271200824528933, 0.9648464918136597, 0.009794626384973526, 0.005800415761768818, 0.0026148436591029167]]
134
+ 2025-05-10 21:23:49,128 - __main__ - INFO - Max Probability (Confidence): 0.9648
135
+ 2025-05-10 21:23:49,128 - __main__ - INFO - Predicted Index: 5
136
+ 2025-05-10 21:23:49,128 - __main__ - INFO - Energy Score: -4.9958
137
+ 2025-05-10 21:23:49,144 - __main__ - INFO - OOD Detection -> is_ood: False
138
+ 2025-05-10 21:23:49,144 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
139
+ 2025-05-10 21:23:49,144 - __main__ - INFO - Predicted Intent: goodbye
140
+ 2025-05-10 21:23:49,159 - __main__ - INFO - =============================================
141
+
142
+ 2025-05-10 21:23:49,175 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:23:49] "POST /api/analyze HTTP/1.1" 200 -
143
+ 2025-05-10 21:23:55,322 - __main__ - INFO -
144
+ ========== INTENT PREDICTION DEBUG ==========
145
+ 2025-05-10 21:23:55,322 - __main__ - INFO - Input Text: ga mau
146
+ 2025-05-10 21:23:55,322 - __main__ - INFO - Detection Method: combined
147
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Logits: [[-0.9056248068809509, -0.013884905725717545, 0.027644459158182144, 5.6327104568481445, -0.8719817996025085, -0.5354712009429932, -0.7891335487365723, -0.8269177079200745, -0.5340971946716309]]
148
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Softmax Probabilities: [[0.0014219597214832902, 0.003468685783445835, 0.0036157723516225815, 0.9827662706375122, 0.0014706128276884556, 0.0020589372143149376, 0.0015976395225152373, 0.0015384004218503833, 0.002061767503619194]]
149
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Max Probability (Confidence): 0.9828
150
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Predicted Index: 3
151
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Energy Score: -5.6501
152
+ 2025-05-10 21:23:55,676 - __main__ - INFO - OOD Detection -> is_ood: False
153
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
154
+ 2025-05-10 21:23:55,676 - __main__ - INFO - Predicted Intent: denied
155
+ 2025-05-10 21:23:55,676 - __main__ - INFO - =============================================
156
+
157
+ 2025-05-10 21:23:55,676 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:23:55] "POST /api/analyze HTTP/1.1" 200 -
158
+ 2025-05-10 21:24:02,159 - __main__ - INFO -
159
+ ========== INTENT PREDICTION DEBUG ==========
160
+ 2025-05-10 21:24:02,159 - __main__ - INFO - Input Text: cariin buku :)
161
+ 2025-05-10 21:24:02,159 - __main__ - INFO - Detection Method: combined
162
+ 2025-05-10 21:24:02,801 - __main__ - INFO - Logits: [[-0.4540617763996124, 5.4417572021484375, -0.9955655336380005, -0.5665326714515686, 0.22001151740550995, -0.4966876804828644, -0.975710391998291, -0.5621989965438843, -1.544423222541809]]
163
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Softmax Probabilities: [[0.002697325311601162, 0.9805168509483337, 0.0015695001929998398, 0.002410393673926592, 0.005292730871587992, 0.002584765199571848, 0.001600974122993648, 0.002420861506834626, 0.0009065577760338783]]
164
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Max Probability (Confidence): 0.9805
165
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Predicted Index: 1
166
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Energy Score: -5.4614
167
+ 2025-05-10 21:24:02,817 - __main__ - INFO - OOD Detection -> is_ood: False
168
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
169
+ 2025-05-10 21:24:02,817 - __main__ - INFO - Predicted Intent: cari_buku
170
+ 2025-05-10 21:24:02,817 - __main__ - INFO - =============================================
171
+
172
+ 2025-05-10 21:24:02,832 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:02] "POST /api/analyze HTTP/1.1" 200 -
173
+ 2025-05-10 21:24:08,582 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:08] "POST /api/recommend HTTP/1.1" 404 -
174
+ 2025-05-10 21:24:24,640 - __main__ - INFO -
175
+ ========== INTENT PREDICTION DEBUG ==========
176
+ 2025-05-10 21:24:24,655 - __main__ - INFO - Input Text: engga
177
+ 2025-05-10 21:24:24,655 - __main__ - INFO - Detection Method: combined
178
+ 2025-05-10 21:24:24,971 - __main__ - INFO - Logits: [[-1.0960450172424316, -0.19882100820541382, 1.0277540683746338, 5.673742771148682, -0.9991940855979919, -0.5354593992233276, -1.1831791400909424, -0.9452515244483948, -0.6324564814567566]]
179
+ 2025-05-10 21:24:24,971 - __main__ - INFO - Softmax Probabilities: [[0.0011242710752412677, 0.002757594920694828, 0.009402111172676086, 0.9793829321861267, 0.0012386050075292587, 0.001969383331015706, 0.0010304549941793084, 0.0013072536094114184, 0.0017873314209282398]]
180
+ 2025-05-10 21:24:24,971 - __main__ - INFO - Max Probability (Confidence): 0.9794
181
+ 2025-05-10 21:24:24,971 - __main__ - INFO - Predicted Index: 3
182
+ 2025-05-10 21:24:24,986 - __main__ - INFO - Energy Score: -5.6946
183
+ 2025-05-10 21:24:24,986 - __main__ - INFO - OOD Detection -> is_ood: False
184
+ 2025-05-10 21:24:24,986 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
185
+ 2025-05-10 21:24:24,986 - __main__ - INFO - Predicted Intent: denied
186
+ 2025-05-10 21:24:24,986 - __main__ - INFO - =============================================
187
+
188
+ 2025-05-10 21:24:25,002 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:25] "POST /api/analyze HTTP/1.1" 200 -
189
+ 2025-05-10 21:24:35,923 - __main__ - INFO -
190
+ ========== INTENT PREDICTION DEBUG ==========
191
+ 2025-05-10 21:24:35,923 - __main__ - INFO - Input Text: kali ini gimana cara jadi anggota
192
+ 2025-05-10 21:24:35,939 - __main__ - INFO - Detection Method: combined
193
+ 2025-05-10 21:24:36,611 - __main__ - INFO - Logits: [[0.7106897234916687, -1.2784496545791626, -1.4926022291183472, -0.25034889578819275, -1.2656588554382324, -0.8868540525436401, 0.061691418290138245, -1.14139723777771, 5.139155864715576]]
194
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Softmax Probabilities: [[0.011566980741918087, 0.0015825150767341256, 0.0012774458155035973, 0.0044243172742426395, 0.0016028864774852991, 0.0023410762660205364, 0.0060445452108979225, 0.0018149681854993105, 0.969345211982727]]
195
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Max Probability (Confidence): 0.9693
196
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Predicted Index: 8
197
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Energy Score: -5.1703
198
+ 2025-05-10 21:24:36,627 - __main__ - INFO - OOD Detection -> is_ood: False
199
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
200
+ 2025-05-10 21:24:36,627 - __main__ - INFO - Predicted Intent: keanggotaan
201
+ 2025-05-10 21:24:36,643 - __main__ - INFO - =============================================
202
+
203
+ 2025-05-10 21:24:36,643 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:36] "POST /api/analyze HTTP/1.1" 200 -
204
+ 2025-05-10 21:24:40,108 - __main__ - INFO -
205
+ ========== INTENT PREDICTION DEBUG ==========
206
+ 2025-05-10 21:24:40,108 - __main__ - INFO - Input Text: apakah gw anggota
207
+ 2025-05-10 21:24:40,108 - __main__ - INFO - Detection Method: combined
208
+ 2025-05-10 21:24:40,612 - __main__ - INFO - Logits: [[-1.2273778915405273, -1.939422369003296, -1.204856276512146, -0.7936017513275146, -0.8146175742149353, -0.17088937759399414, 1.614134669303894, -1.2518943548202515, 4.8576788902282715]]
209
+ 2025-05-10 21:24:40,613 - __main__ - INFO - Softmax Probabilities: [[0.002146817045286298, 0.0010533147724345326, 0.002195715205743909, 0.0033126971684396267, 0.003243803745135665, 0.006174789741635323, 0.036800041794776917, 0.002094824332743883, 0.9429781436920166]]
210
+ 2025-05-10 21:24:40,672 - __main__ - INFO - Max Probability (Confidence): 0.9430
211
+ 2025-05-10 21:24:40,672 - __main__ - INFO - Predicted Index: 8
212
+ 2025-05-10 21:24:40,672 - __main__ - INFO - Energy Score: -4.9164
213
+ 2025-05-10 21:24:40,672 - __main__ - INFO - OOD Detection -> is_ood: False
214
+ 2025-05-10 21:24:40,672 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
215
+ 2025-05-10 21:24:40,672 - __main__ - INFO - Predicted Intent: keanggotaan
216
+ 2025-05-10 21:24:40,672 - __main__ - INFO - =============================================
217
+
218
+ 2025-05-10 21:24:40,687 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:24:40] "POST /api/analyze HTTP/1.1" 200 -
219
+ 2025-05-10 21:28:41,246 - __main__ - INFO -
220
+ ========== INTENT PREDICTION DEBUG ==========
221
+ 2025-05-10 21:28:41,246 - __main__ - INFO - Input Text: apa iyah?
222
+ 2025-05-10 21:28:41,262 - __main__ - INFO - Detection Method: combined
223
+ 2025-05-10 21:28:41,778 - __main__ - INFO - Logits: [[-0.7390039563179016, -2.113006353378296, 3.0566437244415283, 3.1234726905822754, -1.570651650428772, -1.8655962944030762, 1.214770793914795, -1.274828314781189, -0.047212935984134674]]
224
+ 2025-05-10 21:28:41,778 - __main__ - INFO - Softmax Probabilities: [[0.009639445692300797, 0.0024396663065999746, 0.42902329564094543, 0.45867419242858887, 0.004196353256702423, 0.0031244901474565268, 0.06800887733697891, 0.005640873685479164, 0.01925276406109333]]
225
+ 2025-05-10 21:28:41,778 - __main__ - INFO - Max Probability (Confidence): 0.4587
226
+ 2025-05-10 21:28:41,794 - __main__ - INFO - Predicted Index: 3
227
+ 2025-05-10 21:28:41,794 - __main__ - INFO - Energy Score: -3.9029
228
+ 2025-05-10 21:28:41,794 - __main__ - INFO - OOD Detection -> is_ood: True
229
+ 2025-05-10 21:28:41,794 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
230
+ 2025-05-10 21:28:41,794 - __main__ - INFO - Predicted Intent: unknown
231
+ 2025-05-10 21:28:41,794 - __main__ - INFO - =============================================
232
+
233
+ 2025-05-10 21:28:41,794 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:41] "POST /api/analyze HTTP/1.1" 200 -
234
+ 2025-05-10 21:28:50,002 - __main__ - INFO -
235
+ ========== INTENT PREDICTION DEBUG ==========
236
+ 2025-05-10 21:28:50,006 - __main__ - INFO - Input Text: yaudah deh iya
237
+ 2025-05-10 21:28:50,010 - __main__ - INFO - Detection Method: combined
238
+ 2025-05-10 21:28:50,508 - __main__ - INFO - Logits: [[-0.8829267621040344, -1.275931477546692, 5.552294731140137, 0.9029282331466675, -0.8707864880561829, -0.06326564401388168, -1.2235711812973022, -1.3446561098098755, -1.2907097339630127]]
239
+ 2025-05-10 21:28:50,509 - __main__ - INFO - Softmax Probabilities: [[0.0015714645851403475, 0.001060779090039432, 0.9796836376190186, 0.009373282082378864, 0.0015906589105725288, 0.003566801082342863, 0.0011178012937307358, 0.0009903260506689548, 0.0010452179703861475]]
240
+ 2025-05-10 21:28:50,514 - __main__ - INFO - Max Probability (Confidence): 0.9797
241
+ 2025-05-10 21:28:50,514 - __main__ - INFO - Predicted Index: 2
242
+ 2025-05-10 21:28:50,514 - __main__ - INFO - Energy Score: -5.5728
243
+ 2025-05-10 21:28:50,514 - __main__ - INFO - OOD Detection -> is_ood: False
244
+ 2025-05-10 21:28:50,514 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
245
+ 2025-05-10 21:28:50,514 - __main__ - INFO - Predicted Intent: confirm
246
+ 2025-05-10 21:28:50,514 - __main__ - INFO - =============================================
247
+
248
+ 2025-05-10 21:28:50,534 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:50] "POST /api/analyze HTTP/1.1" 200 -
249
+ 2025-05-10 21:28:58,131 - __main__ - INFO -
250
+ ========== INTENT PREDICTION DEBUG ==========
251
+ 2025-05-10 21:28:58,131 - __main__ - INFO - Input Text: apa bedanya cari buku sama pinjam buku
252
+ 2025-05-10 21:28:58,131 - __main__ - INFO - Detection Method: combined
253
+ 2025-05-10 21:28:58,711 - __main__ - INFO - Logits: [[5.502951622009277, 1.039902925491333, -1.1021047830581665, -0.5494447946548462, -0.45617687702178955, -0.8856024742126465, -1.2792505025863647, -0.7736234068870544, -0.5327207446098328]]
254
+ 2025-05-10 21:28:58,711 - __main__ - INFO - Softmax Probabilities: [[0.9757035970687866, 0.01124709565192461, 0.0013206215808168054, 0.002295068232342601, 0.0025194245390594006, 0.0016398499719798565, 0.0011062286794185638, 0.0018341547111049294, 0.00233377399854362]]
255
+ 2025-05-10 21:28:58,711 - __main__ - INFO - Max Probability (Confidence): 0.9757
256
+ 2025-05-10 21:28:58,711 - __main__ - INFO - Predicted Index: 0
257
+ 2025-05-10 21:28:58,711 - __main__ - INFO - Energy Score: -5.5275
258
+ 2025-05-10 21:28:58,711 - __main__ - INFO - OOD Detection -> is_ood: False
259
+ 2025-05-10 21:28:58,727 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
260
+ 2025-05-10 21:28:58,727 - __main__ - INFO - Predicted Intent: cara_pinjam
261
+ 2025-05-10 21:28:58,727 - __main__ - INFO - =============================================
262
+
263
+ 2025-05-10 21:28:58,885 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:28:58] "POST /api/analyze HTTP/1.1" 200 -
264
+ 2025-05-10 21:37:55,693 - __main__ - INFO -
265
+ ========== INTENT PREDICTION DEBUG ==========
266
+ 2025-05-10 21:38:01,389 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda di perpustakaan?
267
+ 2025-05-10 21:38:01,391 - __main__ - INFO - Detection Method: combined
268
+ 2025-05-10 21:38:04,009 - __main__ - INFO - Logits: [[-0.7697362899780273, 1.3467382192611694, -1.27949059009552, -1.299091100692749, 5.202969551086426, -1.3397959470748901, -0.9824981689453125, -0.4959643483161926, -0.7740484476089478]]
269
+ 2025-05-10 21:38:04,025 - __main__ - INFO - Softmax Probabilities: [[0.002458558650687337, 0.020410509780049324, 0.0014767165994271636, 0.0014480534009635448, 0.9651476740837097, 0.0013902944047003984, 0.0019873722922056913, 0.003232794813811779, 0.0024479799903929234]]
270
+ 2025-05-10 21:38:04,041 - __main__ - INFO - Max Probability (Confidence): 0.9651
271
+ 2025-05-10 21:38:04,041 - __main__ - INFO - Predicted Index: 4
272
+ 2025-05-10 21:38:04,056 - __main__ - INFO - Energy Score: -5.2384
273
+ 2025-05-10 21:38:04,056 - __main__ - INFO - OOD Detection -> is_ood: False
274
+ 2025-05-10 21:38:04,056 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
275
+ 2025-05-10 21:38:04,056 - __main__ - INFO - Predicted Intent: fasilitas
276
+ 2025-05-10 21:38:04,056 - __main__ - INFO - =============================================
277
+
278
+ 2025-05-10 21:38:04,119 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:04] "POST /api/analyze HTTP/1.1" 200 -
279
+ 2025-05-10 21:38:14,947 - __main__ - INFO -
280
+ ========== INTENT PREDICTION DEBUG ==========
281
+ 2025-05-10 21:38:14,947 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda yang ada di perpustakaan?
282
+ 2025-05-10 21:38:14,947 - __main__ - INFO - Detection Method: combined
283
+ 2025-05-10 21:38:15,790 - __main__ - INFO - Logits: [[-0.7680988907814026, 1.2449392080307007, -1.196992039680481, -1.266596794128418, 5.14818000793457, -1.350130319595337, -1.160045862197876, -0.40871661901474, -0.8047224283218384]]
284
+ 2025-05-10 21:38:15,790 - __main__ - INFO - Softmax Probabilities: [[0.002601428423076868, 0.019474362954497337, 0.0016941269859671593, 0.0015802178531885147, 0.9652040600776672, 0.001453579985536635, 0.0017578894039615989, 0.003726400900632143, 0.002507878467440605]]
285
+ 2025-05-10 21:38:15,790 - __main__ - INFO - Max Probability (Confidence): 0.9652
286
+ 2025-05-10 21:38:15,806 - __main__ - INFO - Predicted Index: 4
287
+ 2025-05-10 21:38:15,806 - __main__ - INFO - Energy Score: -5.1836
288
+ 2025-05-10 21:38:15,806 - __main__ - INFO - OOD Detection -> is_ood: False
289
+ 2025-05-10 21:38:15,806 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
290
+ 2025-05-10 21:38:15,806 - __main__ - INFO - Predicted Intent: fasilitas
291
+ 2025-05-10 21:38:15,806 - __main__ - INFO - =============================================
292
+
293
+ 2025-05-10 21:38:15,821 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:15] "POST /api/analyze HTTP/1.1" 200 -
294
+ 2025-05-10 21:38:26,471 - __main__ - INFO -
295
+ ========== INTENT PREDICTION DEBUG ==========
296
+ 2025-05-10 21:38:26,475 - __main__ - INFO - Input Text: apa yang anda tahu tentang fasility yang ada di perpustakaan?
297
+ 2025-05-10 21:38:26,477 - __main__ - INFO - Detection Method: combined
298
+ 2025-05-10 21:38:27,385 - __main__ - INFO - Logits: [[-1.6402337551116943, 0.28838950395584106, -1.0334101915359497, -1.377264380455017, 5.436436653137207, -1.3821498155593872, -1.1186164617538452, -0.2397802472114563, -0.14053963124752045]]
299
+ 2025-05-10 21:38:27,399 - __main__ - INFO - Softmax Probabilities: [[0.0008288080571219325, 0.005702228285372257, 0.0015205274103209376, 0.0010781027376651764, 0.9813252687454224, 0.0010728489141911268, 0.0013963347300887108, 0.00336250732652843, 0.003713324898853898]]
300
+ 2025-05-10 21:38:27,414 - __main__ - INFO - Max Probability (Confidence): 0.9813
301
+ 2025-05-10 21:38:27,414 - __main__ - INFO - Predicted Index: 4
302
+ 2025-05-10 21:38:27,414 - __main__ - INFO - Energy Score: -5.4553
303
+ 2025-05-10 21:38:27,414 - __main__ - INFO - OOD Detection -> is_ood: False
304
+ 2025-05-10 21:38:27,414 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
305
+ 2025-05-10 21:38:27,414 - __main__ - INFO - Predicted Intent: fasilitas
306
+ 2025-05-10 21:38:27,414 - __main__ - INFO - =============================================
307
+
308
+ 2025-05-10 21:38:27,414 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:27] "POST /api/analyze HTTP/1.1" 200 -
309
+ 2025-05-10 21:38:40,887 - __main__ - INFO -
310
+ ========== INTENT PREDICTION DEBUG ==========
311
+ 2025-05-10 21:38:40,888 - __main__ - INFO - Input Text: apa yang anda tahu tentang fasilitas yang ada di perpustakaan?
312
+ 2025-05-10 21:38:40,891 - __main__ - INFO - Detection Method: combined
313
+ 2025-05-10 21:38:41,756 - __main__ - INFO - Logits: [[-1.6494208574295044, 0.12875649333000183, -0.980239748954773, -1.2830331325531006, 5.473063945770264, -1.3103773593902588, -0.9920451045036316, -0.4207743704319, -0.007997849956154823]]
314
+ 2025-05-10 21:38:41,756 - __main__ - INFO - Softmax Probabilities: [[0.0007925480604171753, 0.004691137932240963, 0.00154755893163383, 0.0011432621395215392, 0.9823843240737915, 0.0011124236043542624, 0.0015293973265215755, 0.002707820851355791, 0.0040915366262197495]]
315
+ 2025-05-10 21:38:41,756 - __main__ - INFO - Max Probability (Confidence): 0.9824
316
+ 2025-05-10 21:38:41,756 - __main__ - INFO - Predicted Index: 4
317
+ 2025-05-10 21:38:41,756 - __main__ - INFO - Energy Score: -5.4908
318
+ 2025-05-10 21:38:41,756 - __main__ - INFO - OOD Detection -> is_ood: False
319
+ 2025-05-10 21:38:41,772 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
320
+ 2025-05-10 21:38:41,855 - __main__ - INFO - Predicted Intent: fasilitas
321
+ 2025-05-10 21:38:41,855 - __main__ - INFO - =============================================
322
+
323
+ 2025-05-10 21:38:41,868 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:41] "POST /api/analyze HTTP/1.1" 200 -
324
+ 2025-05-10 21:38:51,135 - __main__ - INFO -
325
+ ========== INTENT PREDICTION DEBUG ==========
326
+ 2025-05-10 21:38:51,135 - __main__ - INFO - Input Text: fasilitas yang ada di perpustakaan?
327
+ 2025-05-10 21:38:51,135 - __main__ - INFO - Detection Method: combined
328
+ 2025-05-10 21:38:51,684 - __main__ - INFO - Logits: [[-1.697698712348938, -0.4591014087200165, -0.9102374315261841, -1.1577955484390259, 5.437514305114746, -1.2155272960662842, -0.505920946598053, -0.38150474429130554, -0.2966429889202118]]
329
+ 2025-05-10 21:38:51,684 - __main__ - INFO - Softmax Probabilities: [[0.0007834106218069792, 0.00270336982794106, 0.0017217874992638826, 0.0013442077906802297, 0.9834970235824585, 0.00126880151219666, 0.0025797162670642138, 0.0029214955866336823, 0.0031802428420633078]]
330
+ 2025-05-10 21:38:51,699 - __main__ - INFO - Max Probability (Confidence): 0.9835
331
+ 2025-05-10 21:38:51,699 - __main__ - INFO - Predicted Index: 4
332
+ 2025-05-10 21:38:51,699 - __main__ - INFO - Energy Score: -5.4542
333
+ 2025-05-10 21:38:51,699 - __main__ - INFO - OOD Detection -> is_ood: False
334
+ 2025-05-10 21:38:51,699 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
335
+ 2025-05-10 21:38:51,699 - __main__ - INFO - Predicted Intent: fasilitas
336
+ 2025-05-10 21:38:51,699 - __main__ - INFO - =============================================
337
+
338
+ 2025-05-10 21:38:51,715 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:51] "POST /api/analyze HTTP/1.1" 200 -
339
+ 2025-05-10 21:38:57,269 - __main__ - INFO -
340
+ ========== INTENT PREDICTION DEBUG ==========
341
+ 2025-05-10 21:38:57,273 - __main__ - INFO - Input Text: fasilitas?
342
+ 2025-05-10 21:38:57,277 - __main__ - INFO - Detection Method: combined
343
+ 2025-05-10 21:38:57,554 - __main__ - INFO - Logits: [[-2.5640785694122314, -1.0151931047439575, -0.9696947932243347, -1.25716233253479, 3.5866546630859375, -0.20148932933807373, 2.6498327255249023, -0.7469539642333984, -0.534983217716217]]
344
+ 2025-05-10 21:38:57,554 - __main__ - INFO - Softmax Probabilities: [[0.0014459670055657625, 0.0068050408735871315, 0.007121811155229807, 0.005342504940927029, 0.6782468557357788, 0.01535386499017477, 0.26578542590141296, 0.008898678235709667, 0.010999760590493679]]
345
+ 2025-05-10 21:38:57,554 - __main__ - INFO - Max Probability (Confidence): 0.6782
346
+ 2025-05-10 21:38:57,554 - __main__ - INFO - Predicted Index: 4
347
+ 2025-05-10 21:38:57,570 - __main__ - INFO - Energy Score: -3.9749
348
+ 2025-05-10 21:38:57,570 - __main__ - INFO - OOD Detection -> is_ood: True
349
+ 2025-05-10 21:38:57,570 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
350
+ 2025-05-10 21:38:57,570 - __main__ - INFO - Predicted Intent: unknown
351
+ 2025-05-10 21:38:57,586 - __main__ - INFO - =============================================
352
+
353
+ 2025-05-10 21:38:57,586 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:38:57] "POST /api/analyze HTTP/1.1" 200 -
354
+ 2025-05-10 21:57:53,426 - __main__ - INFO -
355
+ ========== INTENT PREDICTION DEBUG ==========
356
+ 2025-05-10 21:57:53,426 - __main__ - INFO - Input Text: apa yang anda tahu tentang benda benda yang ada di perpustakaan?
357
+ 2025-05-10 21:57:53,426 - __main__ - INFO - Detection Method: combined
358
+ 2025-05-10 21:57:54,536 - __main__ - INFO - Logits: [[-0.7680988907814026, 1.2449392080307007, -1.196992039680481, -1.266596794128418, 5.14818000793457, -1.350130319595337, -1.160045862197876, -0.40871661901474, -0.8047224283218384]]
359
+ 2025-05-10 21:57:54,536 - __main__ - INFO - Softmax Probabilities: [[0.002601428423076868, 0.019474362954497337, 0.0016941269859671593, 0.0015802178531885147, 0.9652040600776672, 0.001453579985536635, 0.0017578894039615989, 0.003726400900632143, 0.002507878467440605]]
360
+ 2025-05-10 21:57:54,536 - __main__ - INFO - Max Probability (Confidence): 0.9652
361
+ 2025-05-10 21:57:54,536 - __main__ - INFO - Predicted Index: 4
362
+ 2025-05-10 21:57:54,536 - __main__ - INFO - Energy Score: -5.1836
363
+ 2025-05-10 21:57:54,551 - __main__ - INFO - OOD Detection -> is_ood: False
364
+ 2025-05-10 21:57:54,551 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
365
+ 2025-05-10 21:57:54,567 - __main__ - INFO - Predicted Intent: fasilitas
366
+ 2025-05-10 21:57:54,598 - __main__ - INFO - =============================================
367
+
368
+ 2025-05-10 21:57:54,676 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:57:54] "POST /api/analyze HTTP/1.1" 200 -
369
+ 2025-05-10 21:58:09,650 - __main__ - INFO -
370
+ ========== INTENT PREDICTION DEBUG ==========
371
+ 2025-05-10 21:58:09,650 - __main__ - INFO - Input Text: selamat pagi dan selamat malam dan selamat siang
372
+ 2025-05-10 21:58:09,650 - __main__ - INFO - Detection Method: combined
373
+ 2025-05-10 21:58:10,353 - __main__ - INFO - Logits: [[-1.015725016593933, -1.7672958374023438, -0.09375888109207153, -0.000339341553626582, -0.7917280793190002, 0.4557315409183502, 3.838515281677246, -0.13580496609210968, -1.4231810569763184]]
374
+ 2025-05-10 21:58:10,353 - __main__ - INFO - Softmax Probabilities: [[0.006958352401852608, 0.003281734185293317, 0.017494892701506615, 0.01920803263783455, 0.00870536733418703, 0.030307628214359283, 0.8926397562026978, 0.016774550080299377, 0.0046296752989292145]]
375
+ 2025-05-10 21:58:10,353 - __main__ - INFO - Max Probability (Confidence): 0.8926
376
+ 2025-05-10 21:58:10,353 - __main__ - INFO - Predicted Index: 6
377
+ 2025-05-10 21:58:10,368 - __main__ - INFO - Energy Score: -3.9521
378
+ 2025-05-10 21:58:10,368 - __main__ - INFO - OOD Detection -> is_ood: False
379
+ 2025-05-10 21:58:10,368 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
380
+ 2025-05-10 21:58:10,368 - __main__ - INFO - Predicted Intent: greeting
381
+ 2025-05-10 21:58:10,368 - __main__ - INFO - =============================================
382
+
383
+ 2025-05-10 21:58:10,368 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:10] "POST /api/analyze HTTP/1.1" 200 -
384
+ 2025-05-10 21:58:16,363 - __main__ - INFO -
385
+ ========== INTENT PREDICTION DEBUG ==========
386
+ 2025-05-10 21:58:16,363 - __main__ - INFO - Input Text: lo jawa
387
+ 2025-05-10 21:58:16,363 - __main__ - INFO - Detection Method: combined
388
+ 2025-05-10 21:58:16,774 - __main__ - INFO - Logits: [[-1.2190250158309937, -1.6057887077331543, 2.1177775859832764, 3.414398670196533, -1.2115315198898315, -1.0860720872879028, 1.689135193824768, -2.435594320297241, -0.16704648733139038]]
389
+ 2025-05-10 21:58:16,775 - __main__ - INFO - Softmax Probabilities: [[0.006397695280611515, 0.004345645196735859, 0.17996107041835785, 0.6581031084060669, 0.0064458162523806095, 0.007307425606995821, 0.11722534894943237, 0.001895284280180931, 0.01831859163939953]]
390
+ 2025-05-10 21:58:16,802 - __main__ - INFO - Max Probability (Confidence): 0.6581
391
+ 2025-05-10 21:58:16,802 - __main__ - INFO - Predicted Index: 3
392
+ 2025-05-10 21:58:16,818 - __main__ - INFO - Energy Score: -3.8328
393
+ 2025-05-10 21:58:16,818 - __main__ - INFO - OOD Detection -> is_ood: True
394
+ 2025-05-10 21:58:16,818 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
395
+ 2025-05-10 21:58:16,818 - __main__ - INFO - Predicted Intent: unknown
396
+ 2025-05-10 21:58:16,834 - __main__ - INFO - =============================================
397
+
398
+ 2025-05-10 21:58:16,849 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:16] "POST /api/analyze HTTP/1.1" 200 -
399
+ 2025-05-10 21:58:29,883 - __main__ - INFO -
400
+ ========== INTENT PREDICTION DEBUG ==========
401
+ 2025-05-10 21:58:29,883 - __main__ - INFO - Input Text: Assalamualaikum wr wb
402
+ 2025-05-10 21:58:29,883 - __main__ - INFO - Detection Method: combined
403
+ 2025-05-10 21:58:32,791 - __main__ - INFO - Logits: [[-1.131690263748169, -1.5006943941116333, -0.6151072978973389, -1.1938962936401367, -1.1617226600646973, 1.5072420835494995, 5.034933567047119, -1.0742135047912598, -0.8630368709564209]]
404
+ 2025-05-10 21:58:33,003 - __main__ - INFO - Softmax Probabilities: [[0.0020071538165211678, 0.0013877918245270848, 0.0033645734656602144, 0.0018861013231799006, 0.0019477707101032138, 0.028096651658415794, 0.956558346748352, 0.002125898841768503, 0.0026257631834596395]]
405
+ 2025-05-10 21:58:33,034 - __main__ - INFO - Max Probability (Confidence): 0.9566
406
+ 2025-05-10 21:58:33,034 - __main__ - INFO - Predicted Index: 6
407
+ 2025-05-10 21:58:33,034 - __main__ - INFO - Energy Score: -5.0793
408
+ 2025-05-10 21:58:33,034 - __main__ - INFO - OOD Detection -> is_ood: False
409
+ 2025-05-10 21:58:33,034 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
410
+ 2025-05-10 21:58:33,034 - __main__ - INFO - Predicted Intent: greeting
411
+ 2025-05-10 21:58:33,049 - __main__ - INFO - =============================================
412
+
413
+ 2025-05-10 21:58:33,113 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:33] "POST /api/analyze HTTP/1.1" 200 -
414
+ 2025-05-10 21:58:40,238 - __main__ - INFO -
415
+ ========== INTENT PREDICTION DEBUG ==========
416
+ 2025-05-10 21:58:40,238 - __main__ - INFO - Input Text: ass
417
+ 2025-05-10 21:58:40,238 - __main__ - INFO - Detection Method: combined
418
+ 2025-05-10 21:58:40,584 - __main__ - INFO - Logits: [[-1.3392796516418457, -1.524475336074829, 0.16145886480808258, -0.7817472815513611, -1.299397349357605, 1.4473503828048706, 5.0309271812438965, -1.4974311590194702, -0.9901137948036194]]
419
+ 2025-05-10 21:58:40,584 - __main__ - INFO - Softmax Probabilities: [[0.0016345757758244872, 0.0013582368846982718, 0.007331073749810457, 0.002854553982615471, 0.0017010837327688932, 0.026523033156991005, 0.9548842906951904, 0.0013954705791547894, 0.0023176397662609816]]
420
+ 2025-05-10 21:58:40,584 - __main__ - INFO - Max Probability (Confidence): 0.9549
421
+ 2025-05-10 21:58:40,584 - __main__ - INFO - Predicted Index: 6
422
+ 2025-05-10 21:58:40,584 - __main__ - INFO - Energy Score: -5.0771
423
+ 2025-05-10 21:58:40,599 - __main__ - INFO - OOD Detection -> is_ood: False
424
+ 2025-05-10 21:58:40,631 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
425
+ 2025-05-10 21:58:40,631 - __main__ - INFO - Predicted Intent: greeting
426
+ 2025-05-10 21:58:40,631 - __main__ - INFO - =============================================
427
+
428
+ 2025-05-10 21:58:40,646 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:58:40] "POST /api/analyze HTTP/1.1" 200 -
429
+ 2025-05-10 21:59:07,034 - __main__ - INFO -
430
+ ========== INTENT PREDICTION DEBUG ==========
431
+ 2025-05-10 21:59:07,034 - __main__ - INFO - Input Text: gausah kocak besok aja bye
432
+ 2025-05-10 21:59:07,034 - __main__ - INFO - Detection Method: combined
433
+ 2025-05-10 21:59:07,812 - __main__ - INFO - Logits: [[-0.6060627102851868, -0.5750446915626526, -1.0750855207443237, 0.9511569738388062, -1.388703465461731, 4.919146537780762, -0.24468335509300232, -0.4451393187046051, -0.9716767072677612]]
434
+ 2025-05-10 21:59:07,812 - __main__ - INFO - Softmax Probabilities: [[0.003815301228314638, 0.003935500048100948, 0.002386903390288353, 0.01810593344271183, 0.001744345179758966, 0.9574074745178223, 0.005476133432239294, 0.004481433890759945, 0.0026469440199434757]]
435
+ 2025-05-10 21:59:07,828 - __main__ - INFO - Max Probability (Confidence): 0.9574
436
+ 2025-05-10 21:59:07,828 - __main__ - INFO - Predicted Index: 5
437
+ 2025-05-10 21:59:07,828 - __main__ - INFO - Energy Score: -4.9627
438
+ 2025-05-10 21:59:07,828 - __main__ - INFO - OOD Detection -> is_ood: False
439
+ 2025-05-10 21:59:07,828 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
440
+ 2025-05-10 21:59:07,828 - __main__ - INFO - Predicted Intent: goodbye
441
+ 2025-05-10 21:59:07,828 - __main__ - INFO - =============================================
442
+
443
+ 2025-05-10 21:59:07,843 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:07] "POST /api/analyze HTTP/1.1" 200 -
444
+ 2025-05-10 21:59:11,113 - __main__ - INFO -
445
+ ========== INTENT PREDICTION DEBUG ==========
446
+ 2025-05-10 21:59:11,113 - __main__ - INFO - Input Text: gausah
447
+ 2025-05-10 21:59:11,113 - __main__ - INFO - Detection Method: combined
448
+ 2025-05-10 21:59:11,479 - __main__ - INFO - Logits: [[-1.1790672540664673, -0.19112896919250488, 0.6465687155723572, 5.715721130371094, -0.954384982585907, -0.07604362815618515, -1.0976141691207886, -1.0053229331970215, -0.6094058752059937]]
449
+ 2025-05-10 21:59:11,480 - __main__ - INFO - Softmax Probabilities: [[0.0009947115322574973, 0.0026714885607361794, 0.006173915695399046, 0.9818962812423706, 0.0012453041272237897, 0.0029973271302878857, 0.0010791246313601732, 0.0011834590695798397, 0.0017583195585757494]]
450
+ 2025-05-10 21:59:11,484 - __main__ - INFO - Max Probability (Confidence): 0.9819
451
+ 2025-05-10 21:59:11,484 - __main__ - INFO - Predicted Index: 3
452
+ 2025-05-10 21:59:11,484 - __main__ - INFO - Energy Score: -5.7340
453
+ 2025-05-10 21:59:11,484 - __main__ - INFO - OOD Detection -> is_ood: False
454
+ 2025-05-10 21:59:11,484 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
455
+ 2025-05-10 21:59:11,484 - __main__ - INFO - Predicted Intent: denied
456
+ 2025-05-10 21:59:11,484 - __main__ - INFO - =============================================
457
+
458
+ 2025-05-10 21:59:11,533 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:11] "POST /api/analyze HTTP/1.1" 200 -
459
+ 2025-05-10 21:59:20,905 - __main__ - INFO -
460
+ ========== INTENT PREDICTION DEBUG ==========
461
+ 2025-05-10 21:59:20,905 - __main__ - INFO - Input Text: aku datang besok
462
+ 2025-05-10 21:59:20,905 - __main__ - INFO - Detection Method: combined
463
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Logits: [[-0.8137130737304688, -0.8092074394226074, -1.8744394779205322, 0.2625807821750641, -1.8174302577972412, 4.685948371887207, 0.7935030460357666, 0.8845048546791077, -1.1589834690093994]]
464
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Softmax Probabilities: [[0.0038252437952905893, 0.00384251750074327, 0.0013243157882243395, 0.011222448199987411, 0.0014020069502294064, 0.935689389705658, 0.019083769991993904, 0.02090189978480339, 0.002708383370190859]]
465
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Max Probability (Confidence): 0.9357
466
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Predicted Index: 5
467
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Energy Score: -4.7524
468
+ 2025-05-10 21:59:21,327 - __main__ - INFO - OOD Detection -> is_ood: False
469
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
470
+ 2025-05-10 21:59:21,327 - __main__ - INFO - Predicted Intent: goodbye
471
+ 2025-05-10 21:59:21,343 - __main__ - INFO - =============================================
472
+
473
+ 2025-05-10 21:59:21,343 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:21] "POST /api/analyze HTTP/1.1" 200 -
474
+ 2025-05-10 21:59:28,988 - __main__ - INFO -
475
+ ========== INTENT PREDICTION DEBUG ==========
476
+ 2025-05-10 21:59:28,988 - __main__ - INFO - Input Text: aku akan tiba besok pagi
477
+ 2025-05-10 21:59:28,993 - __main__ - INFO - Detection Method: combined
478
+ 2025-05-10 21:59:29,531 - __main__ - INFO - Logits: [[-1.0576566457748413, -0.4939669370651245, -1.9942526817321777, 0.07859884947538376, -1.6343517303466797, 3.7366526126861572, 0.27028337121009827, 2.3993427753448486, -1.3766415119171143]]
479
+ 2025-05-10 21:59:29,531 - __main__ - INFO - Softmax Probabilities: [[0.00610245019197464, 0.01072288304567337, 0.0023919143714010715, 0.019009629264473915, 0.0034280631225556135, 0.7373034358024597, 0.02302614599466324, 0.193579763174057, 0.004435788374394178]]
480
+ 2025-05-10 21:59:29,531 - __main__ - INFO - Max Probability (Confidence): 0.7373
481
+ 2025-05-10 21:59:29,531 - __main__ - INFO - Predicted Index: 5
482
+ 2025-05-10 21:59:29,531 - __main__ - INFO - Energy Score: -4.0414
483
+ 2025-05-10 21:59:29,547 - __main__ - INFO - OOD Detection -> is_ood: True
484
+ 2025-05-10 21:59:29,547 - __main__ - INFO - Prediction marked as OUT-OF-DISTRIBUTION.
485
+ 2025-05-10 21:59:29,547 - __main__ - INFO - Predicted Intent: unknown
486
+ 2025-05-10 21:59:29,547 - __main__ - INFO - =============================================
487
+
488
+ 2025-05-10 21:59:29,563 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 21:59:29] "POST /api/analyze HTTP/1.1" 200 -
489
+ 2025-05-10 22:02:08,601 - __main__ - INFO -
490
+ ========== INTENT PREDICTION DEBUG ==========
491
+ 2025-05-10 22:02:08,601 - __main__ - INFO - Input Text: cariin buku bang
492
+ 2025-05-10 22:02:08,601 - __main__ - INFO - Detection Method: combined
493
+ 2025-05-10 22:02:09,038 - __main__ - INFO - Logits: [[-0.49238744378089905, 5.4453630447387695, -1.1035208702087402, -0.4974009394645691, 0.0320424884557724, -0.42038029432296753, -0.6259087324142456, -0.6300820112228394, -1.6206642389297485]]
494
+ 2025-05-10 22:02:09,038 - __main__ - INFO - Softmax Probabilities: [[0.0025876371655613184, 0.9809244871139526, 0.0014044019626453519, 0.0025746964383870363, 0.00437180045992136, 0.0027808379381895065, 0.002264204667881131, 0.002254775259643793, 0.00083733448991552]]
495
+ 2025-05-10 22:02:09,038 - __main__ - INFO - Max Probability (Confidence): 0.9809
496
+ 2025-05-10 22:02:09,038 - __main__ - INFO - Predicted Index: 1
497
+ 2025-05-10 22:02:09,053 - __main__ - INFO - Energy Score: -5.4646
498
+ 2025-05-10 22:02:09,053 - __main__ - INFO - OOD Detection -> is_ood: False
499
+ 2025-05-10 22:02:09,053 - __main__ - INFO - Prediction marked as IN-DISTRIBUTION.
500
+ 2025-05-10 22:02:09,053 - __main__ - INFO - Predicted Intent: cari_buku
501
+ 2025-05-10 22:02:09,053 - __main__ - INFO - =============================================
502
+
503
+ 2025-05-10 22:02:09,069 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 22:02:09] "POST /api/analyze HTTP/1.1" 200 -
504
+ 2025-05-10 22:02:12,836 - werkzeug - INFO - 127.0.0.1 - - [10/May/2025 22:02:12] "POST /api/recommend HTTP/1.1" 404 -
model/class_metrics/f1_per_class.html ADDED
The diff for this file is too large to render. See raw diff
 
model/class_metrics/f1_per_class.png ADDED
model/class_performance_metrics.png ADDED
model/classification_report.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ cara_pinjam,0.984,1.0,0.992,179.0
3
+ cari_buku,0.989,0.983,0.986,180.0
4
+ confirm,0.975,0.975,0.975,120.0
5
+ denied,0.986,0.98,0.983,147.0
6
+ fasilitas,0.989,0.983,0.986,178.0
7
+ goodbye,0.993,0.978,0.985,136.0
8
+ greeting,0.944,0.992,0.967,118.0
9
+ jam_layanan,0.994,1.0,0.997,178.0
10
+ keanggotaan,0.994,0.994,0.994,179.0
11
+ peraturan,1.0,0.971,0.985,174.0
12
+ accuracy,0.986,0.986,0.986,0.986
13
+ macro avg,0.985,0.986,0.985,1589.0
14
+ weighted avg,0.986,0.986,0.986,1589.0
model/classification_report.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": {
3
+ "precision": 0.9944444444444445,
4
+ "recall": 1.0,
5
+ "f1-score": 0.9972144846796658,
6
+ "support": 179.0
7
+ },
8
+ "1": {
9
+ "precision": 0.9888888888888889,
10
+ "recall": 0.9888888888888889,
11
+ "f1-score": 0.9888888888888889,
12
+ "support": 180.0
13
+ },
14
+ "2": {
15
+ "precision": 0.975,
16
+ "recall": 0.975,
17
+ "f1-score": 0.975,
18
+ "support": 120.0
19
+ },
20
+ "3": {
21
+ "precision": 0.9863013698630136,
22
+ "recall": 0.9795918367346939,
23
+ "f1-score": 0.9829351535836177,
24
+ "support": 147.0
25
+ },
26
+ "4": {
27
+ "precision": 0.9943181818181818,
28
+ "recall": 0.9831460674157303,
29
+ "f1-score": 0.9887005649717514,
30
+ "support": 178.0
31
+ },
32
+ "5": {
33
+ "precision": 0.9925373134328358,
34
+ "recall": 0.9779411764705882,
35
+ "f1-score": 0.9851851851851852,
36
+ "support": 136.0
37
+ },
38
+ "6": {
39
+ "precision": 0.9435483870967742,
40
+ "recall": 0.9915254237288136,
41
+ "f1-score": 0.9669421487603306,
42
+ "support": 118.0
43
+ },
44
+ "7": {
45
+ "precision": 0.994413407821229,
46
+ "recall": 1.0,
47
+ "f1-score": 0.9971988795518207,
48
+ "support": 178.0
49
+ },
50
+ "8": {
51
+ "precision": 0.9944444444444445,
52
+ "recall": 1.0,
53
+ "f1-score": 0.9972144846796658,
54
+ "support": 179.0
55
+ },
56
+ "9": {
57
+ "precision": 1.0,
58
+ "recall": 0.9770114942528736,
59
+ "f1-score": 0.9883720930232558,
60
+ "support": 174.0
61
+ },
62
+ "accuracy": 0.9880427942101951,
63
+ "macro avg": {
64
+ "precision": 0.9863896437809814,
65
+ "recall": 0.9873104887491589,
66
+ "f1-score": 0.9867651883324182,
67
+ "support": 1589.0
68
+ },
69
+ "weighted avg": {
70
+ "precision": 0.9882412962498892,
71
+ "recall": 0.9880427942101951,
72
+ "f1-score": 0.988071401599911,
73
+ "support": 1589.0
74
+ }
75
+ }
model/combined_metrics.html ADDED
The diff for this file is too large to render. See raw diff
 
model/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 3072,
26
+ "label2id": {
27
+ "LABEL_0": 0,
28
+ "LABEL_1": 1,
29
+ "LABEL_2": 2,
30
+ "LABEL_3": 3,
31
+ "LABEL_4": 4,
32
+ "LABEL_5": 5,
33
+ "LABEL_6": 6,
34
+ "LABEL_7": 7,
35
+ "LABEL_8": 8,
36
+ "LABEL_9": 9
37
+ },
38
+ "layer_norm_eps": 1e-05,
39
+ "max_position_embeddings": 514,
40
+ "model_type": "xlm-roberta",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 12,
43
+ "output_past": true,
44
+ "pad_token_id": 1,
45
+ "position_embedding_type": "absolute",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.52.4",
48
+ "type_vocab_size": 1,
49
+ "use_cache": true,
50
+ "vocab_size": 250002
51
+ }
model/confusion_matrices/cm_epoch_1.png ADDED
model/confusion_matrices/cm_epoch_10.png ADDED
model/confusion_matrices/cm_epoch_11.png ADDED
model/confusion_matrices/cm_epoch_12.png ADDED
model/confusion_matrices/cm_epoch_2.png ADDED
model/confusion_matrices/cm_epoch_3.png ADDED
model/confusion_matrices/cm_epoch_4.png ADDED
model/confusion_matrices/cm_epoch_5.png ADDED
model/confusion_matrices/cm_epoch_6.png ADDED
model/confusion_matrices/cm_epoch_7.png ADDED
model/confusion_matrices/cm_epoch_8.png ADDED
model/confusion_matrices/cm_epoch_9.png ADDED
model/enhanced_training_history.json ADDED
The diff for this file is too large to render. See raw diff
 
model/final_confusion_matrix.png ADDED
model/intent_classes.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f6662dc15d44fb1eb878094ca3e51afbd73b93d0a13c0dc16ae5196667fedf
3
+ size 267
model/interactive_class_performance.html ADDED
The diff for this file is too large to render. See raw diff
 
model/interactive_confusion_matrix.html ADDED
The diff for this file is too large to render. See raw diff
 
model/interactive_training_metrics.html ADDED
The diff for this file is too large to render. See raw diff
 
model/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f038f671b716874251fcc9d9076b5096dbfff121e75cfde29bc91769479756
3
+ size 361
model/learning_rate_schedule.html ADDED
The diff for this file is too large to render. See raw diff
 
model/learning_rate_schedule.png ADDED
model/ood_thresholds.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "energy_threshold": -6.981417655944824,
3
+ "msp_threshold": 0.8888697624206543
4
+ }
model/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
model/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
model/test_results/test_run_20250702_143737/classification_report.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cara_pinjam": {
3
+ "precision": 0.9534883720930233,
4
+ "recall": 1.0,
5
+ "f1-score": 0.9761904761904762,
6
+ "support": 41.0
7
+ },
8
+ "cari_buku": {
9
+ "precision": 1.0,
10
+ "recall": 1.0,
11
+ "f1-score": 1.0,
12
+ "support": 41.0
13
+ },
14
+ "confirm": {
15
+ "precision": 0.9714285714285714,
16
+ "recall": 0.8292682926829268,
17
+ "f1-score": 0.8947368421052632,
18
+ "support": 41.0
19
+ },
20
+ "denied": {
21
+ "precision": 0.9069767441860465,
22
+ "recall": 0.9512195121951219,
23
+ "f1-score": 0.9285714285714286,
24
+ "support": 41.0
25
+ },
26
+ "fasilitas": {
27
+ "precision": 1.0,
28
+ "recall": 1.0,
29
+ "f1-score": 1.0,
30
+ "support": 41.0
31
+ },
32
+ "goodbye": {
33
+ "precision": 0.9473684210526315,
34
+ "recall": 0.8780487804878049,
35
+ "f1-score": 0.9113924050632911,
36
+ "support": 41.0
37
+ },
38
+ "greeting": {
39
+ "precision": 0.8604651162790697,
40
+ "recall": 0.9024390243902439,
41
+ "f1-score": 0.8809523809523809,
42
+ "support": 41.0
43
+ },
44
+ "jam_layanan": {
45
+ "precision": 0.9318181818181818,
46
+ "recall": 1.0,
47
+ "f1-score": 0.9647058823529412,
48
+ "support": 41.0
49
+ },
50
+ "keanggotaan": {
51
+ "precision": 1.0,
52
+ "recall": 1.0,
53
+ "f1-score": 1.0,
54
+ "support": 41.0
55
+ },
56
+ "peraturan": {
57
+ "precision": 1.0,
58
+ "recall": 1.0,
59
+ "f1-score": 1.0,
60
+ "support": 41.0
61
+ },
62
+ "accuracy": 0.9560975609756097,
63
+ "macro avg": {
64
+ "precision": 0.9571545406857525,
65
+ "recall": 0.9560975609756097,
66
+ "f1-score": 0.9556549415235782,
67
+ "support": 410.0
68
+ },
69
+ "weighted avg": {
70
+ "precision": 0.9571545406857526,
71
+ "recall": 0.9560975609756097,
72
+ "f1-score": 0.955654941523578,
73
+ "support": 410.0
74
+ }
75
+ }
model/test_results/test_run_20250702_143737/confidence_analysis.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mean_confidence": 0.9509811997413635,
3
+ "std_confidence": 0.06421167403459549,
4
+ "min_confidence": 0.48898056149482727,
5
+ "max_confidence": 0.9849911332130432,
6
+ "median_confidence": 0.9713054299354553,
7
+ "q25_confidence": 0.9562950134277344,
8
+ "q75_confidence": 0.979069173336029,
9
+ "mean_confidence_correct": 0.9571561217308044,
10
+ "mean_confidence_incorrect": 0.816504180431366,
11
+ "std_confidence_correct": 0.048875272274017334,
12
+ "std_confidence_incorrect": 0.15158796310424805
13
+ }
model/test_results/test_run_20250702_143737/confidence_analysis.png ADDED

Git LFS Details

  • SHA256: c17a3315cf83d3fdb7ac3e7494798f4042b8c951af62a0aaebc2de44042f6e08
  • Pointer size: 131 Bytes
  • Size of remote file: 354 kB