rifal2024 commited on
Commit
c6c2928
Β·
verified Β·
1 Parent(s): fd19fc2

Upload sentiment_api.py

Browse files
Files changed (1) hide show
  1. sentiment_api.py +279 -0
sentiment_api.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # filepath: [sentiment_api.py](http://_vscodecontentref_/0)
2
+ from fastapi import FastAPI, Request
3
+ from fastapi.middleware.cors import CORSMiddleware # ← TAMBAH INI
4
+ from pydantic import BaseModel
5
+ from fastapi.responses import JSONResponse
6
+ import re
7
+ import os
8
+
9
+ app = FastAPI(title="Indonesian Sentiment Analysis API",
10
+ description="API untuk analisis sentimen bahasa Indonesia dengan dukungan bahasa gaul",
11
+ version="1.0.0")
12
+
13
+ # ← TAMBAH CORS MIDDLEWARE
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"], # Allows all origins
17
+ allow_credentials=True,
18
+ allow_methods=["*"], # Allows all methods
19
+ allow_headers=["*"], # Allows all headers
20
+ )
21
+
22
+ # Global variable untuk model (akan diload jika tersedia)
23
+ model = None
24
+ tokenizer = None
25
+ model_loaded = False
26
+
27
+ def load_model():
28
+ """Try to load IndoBERT sentiment model, fallback to enhanced keyword if failed"""
29
+ global model, tokenizer, model_loaded
30
+
31
+ # List model alternatif yang bisa dicoba
32
+ model_options = [
33
+ "indolem/indobert-base-uncased", # Model yang lebih umum dan pasti tersedia
34
+ "cahya/bert-base-indonesian-1.5G", # Alternative Indonesian BERT
35
+ "mdhugol/indonesia-bert-sentiment-classification" # Specific sentiment model
36
+ ]
37
+
38
+ for model_name in model_options:
39
+ try:
40
+ print(f"πŸ”„ Trying to load model: {model_name}")
41
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
42
+
43
+ # Load tokenizer
44
+ print(f"πŸ“₯ Downloading tokenizer for {model_name}...")
45
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
46
+ print("βœ… Tokenizer loaded successfully!")
47
+
48
+ # Load model
49
+ print(f"πŸ“₯ Downloading model {model_name} (this may take a while)...")
50
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
51
+ print("βœ… Model loaded successfully!")
52
+
53
+ model_loaded = True
54
+ print(f"πŸŽ‰ {model_name} ready for sentiment analysis!")
55
+ return # Exit jika berhasil
56
+
57
+ except Exception as e:
58
+ print(f"❌ Failed to load {model_name}: {e}")
59
+ continue # Coba model berikutnya
60
+
61
+ # Jika semua model gagal
62
+ print("❌ All models failed to load")
63
+ print("πŸ”„ Using enhanced keyword-based analysis instead")
64
+ model_loaded = False
65
+
66
+ # Try to load model on startup
67
+ load_model()
68
+
69
+ @app.get("/")
70
+ async def root():
71
+ """Root endpoint"""
72
+ model_name = "Unknown"
73
+ if model_loaded and model is not None:
74
+ model_name = model.config.name_or_path if hasattr(model.config, 'name_or_path') else "Indonesian BERT Model"
75
+
76
+ return {
77
+ "message": "Indonesian Sentiment Analysis API",
78
+ "version": "1.0.0",
79
+ "docs": "/docs",
80
+ "model_loaded": model_loaded,
81
+ "model_name": model_name if model_loaded else "Enhanced Keyword Analysis",
82
+ "model_type": "πŸ€– AI Model" if model_loaded else "πŸ“ Keyword Analysis",
83
+ "status": "πŸŽ‰ Ready!" if model_loaded else "πŸ“ Keyword Ready!"
84
+ }
85
+
86
+ @app.get("/health")
87
+ async def health_check():
88
+ """Health check endpoint"""
89
+ return {
90
+ "status": "healthy",
91
+ "model_loaded": model_loaded,
92
+ "model_type": "IndoBERTweet" if model_loaded else "Enhanced Keyword Analysis",
93
+ "ready": True
94
+ }
95
+
96
+ class TextRequest(BaseModel):
97
+ text: str
98
+
99
+ def normalize_slang(text):
100
+ """Normalisasi kata gaul/slang ke bahasa baku"""
101
+ slang_dict = {
102
+ 'gw': 'saya', 'gue': 'saya', 'w': 'saya',
103
+ 'lu': 'kamu', 'elu': 'kamu', 'lo': 'kamu',
104
+ 'gk': 'tidak', 'ga': 'tidak', 'gak': 'tidak', 'engga': 'tidak',
105
+ 'bgt': 'banget', 'bgt': 'sangat',
106
+ 'btw': 'ngomong ngomong', 'fyi': 'informasi',
107
+ 'yg': 'yang', 'yng': 'yang',
108
+ 'dgn': 'dengan', 'dg': 'dengan',
109
+ 'org': 'orang', 'orng': 'orang',
110
+ 'udh': 'sudah', 'udah': 'sudah', 'dah': 'sudah',
111
+ 'blm': 'belum', 'blom': 'belum',
112
+ 'bkn': 'bukan', 'bukan': 'bukan',
113
+ 'krn': 'karena', 'krna': 'karena',
114
+ 'trs': 'terus', 'trus': 'terus',
115
+ 'jg': 'juga', 'jga': 'juga',
116
+ 'aja': 'saja', 'ajah': 'saja',
117
+ 'emg': 'memang', 'emang': 'memang',
118
+ 'tp': 'tapi', 'tapi': 'tetapi',
119
+ 'kalo': 'kalau', 'klo': 'kalau',
120
+ 'gimana': 'bagaimana', 'gmn': 'bagaimana',
121
+ 'knp': 'kenapa', 'knapa': 'kenapa',
122
+ 'mantap': 'bagus', 'mantul': 'bagus',
123
+ 'anjay': 'wah', 'anjir': 'wah',
124
+ 'gabut': 'tidak ada kegiatan',
125
+ 'mager': 'malas gerak',
126
+ 'baper': 'bawa perasaan',
127
+ 'santuy': 'santai',
128
+ 'kepo': 'ingin tahu',
129
+ 'php': 'pemberi harapan palsu',
130
+ 'bucin': 'budak cinta',
131
+ # Tambahan kata positif yang sering dipakai
132
+ 'seneng': 'senang', 'senang': 'senang',
133
+ 'bahagia': 'bahagia', 'happy': 'senang',
134
+ 'kamaren': 'kemarin', 'kemaren': 'kemarin'
135
+ }
136
+
137
+ # Convert to lowercase
138
+ text = text.lower()
139
+
140
+ # Replace slang words
141
+ for slang, formal in slang_dict.items():
142
+ text = re.sub(r'\b' + slang + r'\b', formal, text)
143
+
144
+ return text
145
+
146
+ def analyze_sentiment(text):
147
+ """Analisis sentimen dengan IndoBERTweet atau enhanced keyword"""
148
+ global model, tokenizer, model_loaded
149
+
150
+ # Normalisasi kata gaul
151
+ normalized_text = normalize_slang(text)
152
+
153
+ # Debug info
154
+ print(f"πŸ” Analyzing: '{text}'")
155
+ print(f"πŸ”§ Normalized: '{normalized_text}'")
156
+ print(f"πŸ€– Model loaded: {model_loaded}")
157
+
158
+ # Coba gunakan IndoBERTweet jika tersedia
159
+ if model_loaded and model is not None and tokenizer is not None:
160
+ try:
161
+ import torch
162
+ print("🎯 Using IndoBERTweet model...")
163
+
164
+ # Tokenize input
165
+ inputs = tokenizer(normalized_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
166
+
167
+ # Get prediction
168
+ with torch.no_grad():
169
+ outputs = model(**inputs)
170
+ logits = outputs.logits
171
+ probabilities = torch.softmax(logits, dim=1)
172
+ pred = torch.argmax(logits, dim=1).item()
173
+ confidence = torch.max(probabilities).item()
174
+
175
+ print(f"πŸ“Š IndoBERTweet prediction: {pred} (confidence: {confidence:.3f})")
176
+ print(f"πŸ“Š Probabilities: {probabilities.numpy()}")
177
+
178
+ # Mapping label IndoBERTweet ke rating bintang
179
+ # IndoBERTweet: 0=negative, 1=neutral, 2=positive
180
+ if pred == 2: # positive
181
+ result = 5 if confidence > 0.8 else 4
182
+ print(f"βœ… Result: {result} stars (Positive)")
183
+ return result
184
+ elif pred == 1: # neutral
185
+ result = 3
186
+ print(f"😐 Result: {result} stars (Neutral)")
187
+ return result
188
+ else: # negative (pred == 0)
189
+ result = 1 if confidence > 0.8 else 2
190
+ print(f"❌ Result: {result} stars (Negative)")
191
+ return result
192
+
193
+ except Exception as e:
194
+ print(f"⚠️ Error using IndoBERTweet: {e}")
195
+ print("πŸ”„ Falling back to keyword analysis...")
196
+
197
+ # Enhanced keyword-based analysis (fallback)
198
+ print("πŸ”€ Using enhanced keyword analysis...")
199
+ result = enhanced_keyword_analysis(normalized_text, text)
200
+ print(f"πŸ“ Keyword analysis result: {result} stars")
201
+ return result
202
+
203
+ def enhanced_keyword_analysis(normalized_text, original_text):
204
+ """Enhanced keyword analysis untuk bahasa Indonesia + slang"""
205
+ text_lower = normalized_text.lower()
206
+
207
+ # Positive keywords (diperbanyak dan lebih sensitif)
208
+ positive_words = [
209
+ "senang", "bahagia", "happy", "mantap", "bagus", "keren", "suka", "cinta", "love",
210
+ "amazing", "luar biasa", "hebat", "fantastis", "sempurna", "excellent", "good",
211
+ "positif", "optimis", "gembiraan", "kebahagiaan", "sukses", "berhasil", "menang",
212
+ "excited", "antusias", "semangat", "motivasi", "inspirasi", "grateful", "bersyukur",
213
+ "mantul", "jos", "top", "juara", "recommended", "worth it", "puas", "satisfied",
214
+ "gembira", "asyik", "asik", "cool", "nice", "wonderful", "great", "awesome"
215
+ ]
216
+
217
+ # Strong positive words (kata yang sangat positif)
218
+ strong_positive_words = [
219
+ "banget", "sangat", "luar biasa", "fantastis", "sempurna", "amazing", "awesome",
220
+ "gembira", "bahagia banget", "senang banget", "happy banget"
221
+ ]
222
+
223
+ # Negative keywords (diperbanyak)
224
+ negative_words = [
225
+ "marah", "kesal", "benci", "jelek", "buruk", "jahat", "sedih", "kecewa", "galau",
226
+ "frustrated", "angry", "hate", "bad", "terrible", "awful", "horrible", "disgusting",
227
+ "menyebalkan", "annoying", "stress", "depresi", "down", "hopeless", "putus asa",
228
+ "fail", "gagal", "rugi", "loss", "disappointed", "broken heart", "sakit hati",
229
+ "toxic", "drama", "problem", "masalah", "susah", "sulit", "capek", "tired"
230
+ ]
231
+
232
+ # Neutral/Mixed keywords
233
+ neutral_words = [
234
+ "biasa", "standard", "normal", "okay", "ok", "fine", "lumayan", "so so",
235
+ "average", "medium", "moderate", "netral", "balanced", "mixed feelings"
236
+ ]
237
+
238
+ # Negation words (kata negasi)
239
+ negation_words = ["tidak", "bukan", "jangan", "gak", "ga", "engga", "no", "nope", "never"]
240
+
241
+ # Count sentiment words
242
+ positive_count = sum(1 for word in positive_words if word in text_lower)
243
+ strong_positive_count = sum(1 for word in strong_positive_words if word in text_lower)
244
+ negative_count = sum(1 for word in negative_words if word in text_lower)
245
+ neutral_count = sum(1 for word in neutral_words if word in text_lower)
246
+
247
+ # Check for combinations like "senang banget"
248
+ if "senang banget" in text_lower or "bahagia banget" in text_lower or "happy banget" in text_lower:
249
+ strong_positive_count += 2
250
+
251
+ # Check for negations
252
+ has_negation = any(neg in text_lower for neg in negation_words)
253
+
254
+ # Advanced scoring with context
255
+ if has_negation:
256
+ # If there's negation, flip the sentiment partially
257
+ if positive_count > negative_count:
258
+ return 3 # Neutral instead of positive
259
+ elif negative_count > positive_count:
260
+ return 4 # Less negative
261
+
262
+ # Calculate sentiment score with strong positive bonus
263
+ total_positive = positive_count + (strong_positive_count * 2) # Strong words worth double
264
+
265
+ if total_positive > negative_count + neutral_count:
266
+ return 5 # Strong positive
267
+ elif total_positive > negative_count:
268
+ return 4 # Mild positive
269
+ elif negative_count > total_positive + neutral_count:
270
+ return 1 # Strong negative
271
+ elif negative_count > total_positive:
272
+ return 2 # Mild negative
273
+ else:
274
+ return 3 # Neutral
275
+
276
+ @app.post("/predict")
277
+ async def predict(req: TextRequest):
278
+ stars = analyze_sentiment(req.text)
279
+ return JSONResponse(content={"stars": stars})