Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request, jsonify, send_file | |
| try: | |
| from services.aggregator import collect_data | |
| except Exception as e: | |
| print(f"❌ FATAL: aggregator gagal load: {e}") | |
| def collect_data(kw, src="all"): return [("unknown", "aggregator error")] | |
| try: | |
| from services.sentiment import predict_with_score | |
| except Exception as e: | |
| print(f"⚠️ sentiment gagal load: {e} — rule-based fallback") | |
| def predict_with_score(texts): | |
| def _rb(t): | |
| pos = sum(1 for k in ['bagus','baik','senang','suka','mantap','oke','good','great'] if k in t.lower()) | |
| neg = sum(1 for k in ['buruk','jelek','benci','kecewa','gagal','bad','worst'] if k in t.lower()) | |
| label = 'Positive' if pos > neg else 'Negative' if neg > pos else 'Neutral' | |
| return {'label': label, 'score': 0.5} | |
| return [_rb(t) for t in texts] | |
| from collections import Counter | |
| import pandas as pd | |
| import os, re | |
| import numpy as np | |
| from datetime import datetime | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer | |
| from sklearn.decomposition import LatentDirichletAllocation | |
| from sklearn.cluster import KMeans | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.pipeline import Pipeline | |
| import networkx as nx | |
| from itertools import combinations | |
| from wordcloud import WordCloud | |
| # Deep preprocessing | |
| try: | |
| from services.preprocessing_id import clean_text_deep, batch_clean, STOPWORDS | |
| DEEP_PREP = True | |
| print("✅ Deep preprocessing loaded") | |
| except ImportError: | |
| DEEP_PREP = False | |
| STOPWORDS = {'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada','pada','juga','tidak','bisa','sudah','the','is','in','of','a','an','and','it'} | |
| def clean_text_deep(t): | |
| t = t.lower() | |
| t = re.sub(r'http\S+', '', t) | |
| t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t) | |
| return re.sub(r'\s+', ' ', t).strip() | |
| def batch_clean(texts): return [clean_text_deep(t) for t in texts] | |
| try: | |
| from services.bot_bert import detect_bot_bert | |
| except Exception: | |
| def detect_bot_bert(x): return [] | |
| try: | |
| from services.fake_news import detect_fake_news | |
| except Exception: | |
| def detect_fake_news(x): return [] | |
| # ── New NLP Services ── | |
| try: | |
| from services.absa import analyze_absa | |
| except Exception as e: | |
| print(f"⚠️ ABSA not available: {e}") | |
| def analyze_absa(x): return {'top_aspects':[],'aggregate':{},'aspect_sentiment_map':{}} | |
| try: | |
| from services.ner import analyze_ner | |
| except Exception as e: | |
| print(f"⚠️ NER not available: {e}") | |
| def analyze_ner(x): return {'top_entities':[],'entities_by_type':{}} | |
| try: | |
| from services.advanced_nlp import ( | |
| analyze_stance, analyze_emotions, | |
| extract_keywords, summarize_by_platform | |
| ) | |
| except Exception as e: | |
| print(f"⚠️ Advanced NLP not available: {e}") | |
| def analyze_stance(x, t=None): return {'counts':{},'dominant':'Neutral','favor_pct':0,'against_pct':0,'neutral_pct':0} | |
| def analyze_emotions(x): return {'distribution':{},'dominant':'neutral','emotional_pct':0} | |
| def extract_keywords(x, n=20): return [] | |
| def summarize_by_platform(x): return {} | |
| app = Flask(__name__) | |
| CONF_THRESHOLD = 0.60 | |
| # ── HOAX CLASSIFIER ── | |
| _HX = ["berita ini bohong dan tidak benar","ini propaganda yang menyesatkan","jangan percaya hoax yang beredar","informasi palsu disebarkan untuk memfitnah","disinformasi sengaja dibuat untuk menipu","berita palsu sangat meresahkan warga","menyebarkan kebohongan dan fitnah","manipulasi politik yang berbahaya","provokasi untuk memecah belah bangsa","ujaran kebencian dan fitnah","waspada berita bohong sengaja disebarkan","hoaks sudah dibantah pihak berwenang","informasi menyesatkan tidak ada bukti","narasi sesat untuk mengadu domba","berita manipulatif perlu diklarifikasi","produk ini sangat bagus dan berkualitas","saya sangat senang dengan pelayanannya","hasil kerja tim ini luar biasa","kebijakan ini berdampak positif masyarakat","acara kemarin berjalan lancar dan meriah","terima kasih atas bantuan yang diberikan","pemerintah berupaya meningkatkan kesejahteraan","inovasi terbaru sangat membantu kehidupan","prestasi luar biasa yang membanggakan","kondisi ekonomi mulai membaik dari data","program ini memberikan manfaat nyata","kolaborasi baik menghasilkan output optimal","penelitian ini memberikan temuan menarik","masyarakat antusias menyambut kebijakan baru","kualitas pendidikan terus meningkat"] | |
| _HY = [1]*15 + [0]*15 | |
| _hoax_clf = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), max_features=500, sublinear_tf=True)), ('clf', LogisticRegression(C=1.0, max_iter=200, random_state=42, class_weight='balanced'))]) | |
| try: | |
| _hoax_clf.fit(_HX, _HY) | |
| print("✅ Hoax classifier ready") | |
| except Exception as e: | |
| print(f"⚠️ Hoax error: {e}") | |
| _hoax_clf = None | |
| # ── CONFIDENCE FILTER (Priority 2) ── | |
| def apply_confidence_filter(scored, threshold=CONF_THRESHOLD): | |
| result = [] | |
| for item in scored: | |
| conf = item.get('score', 0) | |
| label = item.get('label', 'Neutral') | |
| result.append({**item, 'sentiment': label if conf >= threshold else 'Uncertain', 'confidence': round(conf, 4), 'is_certain': conf >= threshold}) | |
| return result | |
| def confidence_stats(result_data): | |
| by_class = {'Positive':[], 'Negative':[], 'Neutral':[], 'Uncertain':[]} | |
| for r in result_data: | |
| s = r.get('sentiment','Neutral') | |
| c = r.get('confidence',0) | |
| if s in by_class: by_class[s].append(c) | |
| else: by_class['Uncertain'].append(c) | |
| stats = {} | |
| for cls, vals in by_class.items(): | |
| if vals: | |
| stats[cls] = {'count':len(vals),'mean':round(float(np.mean(vals)),3),'std':round(float(np.std(vals)),3),'min':round(float(np.min(vals)),3),'max':round(float(np.max(vals)),3)} | |
| else: | |
| stats[cls] = {'count':0,'mean':0,'std':0,'min':0,'max':0} | |
| all_conf = [r.get('confidence',0) for r in result_data] | |
| return {'by_class':stats,'buckets':{'high (≥0.8)':sum(1 for c in all_conf if c>=0.8),'med (0.6-0.8)':sum(1 for c in all_conf if 0.6<=c<0.8),'low (<0.6)':sum(1 for c in all_conf if c<0.6)},'uncertain_count':sum(1 for r in result_data if not r.get('is_certain',True)),'avg_confidence':round(float(np.mean(all_conf)),3) if all_conf else 0} | |
| # ── CROSS-PLATFORM ANALYSIS (Priority 3) ── | |
| def cross_platform_analysis(result_data): | |
| platforms = {} | |
| for r in result_data: | |
| src = r.get('source','unknown'); sent = r.get('sentiment','Neutral'); conf = r.get('confidence',0) | |
| if src not in platforms: | |
| platforms[src] = {'Positive':0,'Negative':0,'Neutral':0,'Uncertain':0,'total':0,'conf_sum':0} | |
| if sent in platforms[src]: platforms[src][sent] += 1 | |
| else: platforms[src]['Uncertain'] += 1 | |
| platforms[src]['total'] += 1 | |
| platforms[src]['conf_sum'] += conf | |
| platform_stats = {} | |
| for src, c in platforms.items(): | |
| t = c['total'] or 1 | |
| pos_r = c['Positive']/t; neg_r = c['Negative']/t; neu_r = c['Neutral']/t | |
| platform_stats[src] = {'total':t,'pos_count':c['Positive'],'neg_count':c['Negative'],'neu_count':c['Neutral'],'unc_count':c['Uncertain'],'pos_pct':round(pos_r*100,1),'neg_pct':round(neg_r*100,1),'neu_pct':round(neu_r*100,1),'unc_pct':round(c['Uncertain']/t*100,1),'polarity':round(abs(pos_r-neg_r),3),'avg_conf':round(c['conf_sum']/t,3),'dominant':max(['Positive','Negative','Neutral','Uncertain'],key=lambda s:c[s])} | |
| if not platform_stats: | |
| return {'platforms':{},'pairwise':[],'insights':[],'most_positive':None,'most_negative':None,'most_polarized':None} | |
| srcs = list(platform_stats.keys()) | |
| most_positive = max(srcs, key=lambda s: platform_stats[s]['pos_pct']) | |
| most_negative = max(srcs, key=lambda s: platform_stats[s]['neg_pct']) | |
| most_polarized = max(srcs, key=lambda s: platform_stats[s]['polarity']) | |
| pairwise = [] | |
| for i in range(len(srcs)): | |
| for j in range(i+1, len(srcs)): | |
| a, b = srcs[i], srcs[j] | |
| diff = round(abs(platform_stats[a]['pos_pct']-platform_stats[b]['pos_pct']),1) | |
| pairwise.append({'platform_a':a,'platform_b':b,'pos_diff':diff,'description':f"{a} vs {b}: selisih sentimen positif {diff}%"}) | |
| insights = [] | |
| if len(srcs) > 1: | |
| insights.append(f"{most_positive.capitalize()} memiliki sentimen positif tertinggi ({platform_stats[most_positive]['pos_pct']}%).") | |
| insights.append(f"{most_negative.capitalize()} memiliki sentimen negatif tertinggi ({platform_stats[most_negative]['neg_pct']}%).") | |
| insights.append(f"{most_polarized.capitalize()} paling terpolarisasi (indeks {platform_stats[most_polarized]['polarity']}).") | |
| return {'platforms':platform_stats,'pairwise':pairwise,'insights':insights,'most_positive':most_positive,'most_negative':most_negative,'most_polarized':most_polarized} | |
| def generate_comparative_chart(cross_data): | |
| try: | |
| platforms = cross_data.get('platforms',{}) | |
| if len(platforms) < 2: return | |
| os.makedirs("static", exist_ok=True) | |
| srcs = list(platforms.keys()) | |
| pos = [platforms[s]['pos_pct'] for s in srcs] | |
| neg = [platforms[s]['neg_pct'] for s in srcs] | |
| neu = [platforms[s]['neu_pct'] for s in srcs] | |
| pol = [platforms[s]['polarity']*100 for s in srcs] | |
| cnf = [platforms[s]['avg_conf']*100 for s in srcs] | |
| x = np.arange(len(srcs)); w = 0.26 | |
| fig, axes = plt.subplots(1, 2, figsize=(13,4)) | |
| fig.patch.set_facecolor('#0e1117') | |
| for ax in axes: ax.set_facecolor('#141820') | |
| axes[0].bar(x-w, pos, w, label='Positif', color='#22c55e', alpha=0.85) | |
| axes[0].bar(x, neg, w, label='Negatif', color='#ef4444', alpha=0.85) | |
| axes[0].bar(x+w, neu, w, label='Netral', color='#94a3b8', alpha=0.85) | |
| axes[0].set_xticks(x); axes[0].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9) | |
| axes[0].set_title('Distribusi Sentimen per Platform', color='#e8eaf0', fontsize=10) | |
| axes[0].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4') | |
| axes[0].set_ylim(0,105); axes[0].tick_params(colors='#5a6070') | |
| axes[1].bar(x-0.2, pol, 0.38, label='Polarisasi ×100', color='#f59e0b', alpha=0.8) | |
| axes[1].bar(x+0.2, cnf, 0.38, label='Avg Confidence %', color='#4f9cf9', alpha=0.8) | |
| axes[1].set_xticks(x); axes[1].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9) | |
| axes[1].set_title('Polarisasi & Confidence per Platform', color='#e8eaf0', fontsize=10) | |
| axes[1].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4') | |
| axes[1].set_ylim(0,110); axes[1].tick_params(colors='#5a6070') | |
| for ax in axes: | |
| for sp in ax.spines.values(): sp.set_edgecolor('#1a2030') | |
| plt.tight_layout(pad=1.5) | |
| plt.savefig("static/comparative.png", dpi=110, facecolor=fig.get_facecolor()) | |
| plt.close(fig) | |
| except Exception as e: | |
| print(f"comparative chart error: {e}") | |
| # ── RICH EXPORT (Priority 4) ── | |
| def build_export_data(result_data, keyword, source, conf_stats, cross_data, trend): | |
| ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| main_rows = [{'index':i+1,'text':r.get('text',''),'text_length':len(r.get('text','').split()),'sentiment':r.get('sentiment',''),'confidence':r.get('confidence',0),'is_certain':r.get('is_certain',True),'source':r.get('source',''),'scraped_at':r.get('scraped_at',ts),'keyword':keyword} for i,r in enumerate(result_data)] | |
| total = len(result_data) or 1 | |
| pos = sum(1 for r in result_data if r.get('sentiment')=='Positive') | |
| neg = sum(1 for r in result_data if r.get('sentiment')=='Negative') | |
| neu = sum(1 for r in result_data if r.get('sentiment')=='Neutral') | |
| unc = sum(1 for r in result_data if r.get('sentiment')=='Uncertain') | |
| summary = {'keyword':keyword,'source':source,'analyzed_at':ts,'total_samples':total,'positive_count':pos,'negative_count':neg,'neutral_count':neu,'uncertain_count':unc,'positive_pct':round(pos/total*100,1),'negative_pct':round(neg/total*100,1),'neutral_pct':round(neu/total*100,1),'uncertain_pct':round(unc/total*100,1),'avg_confidence':conf_stats.get('avg_confidence',0),'trend_label':trend.get('label',''),'polarity_index':trend.get('polarity',0),'most_positive_platform':cross_data.get('most_positive','')} | |
| return {'main':main_rows,'summary':summary} | |
| def save_export_csv(export): | |
| os.makedirs("static", exist_ok=True) | |
| pd.DataFrame(export['main']).to_csv("static/result.csv", index=False) | |
| pd.DataFrame([export['summary']]).to_csv("static/summary.csv", index=False) | |
| # ── CORE FUNCTIONS ── | |
| def get_top_words(texts): | |
| words = [] | |
| for t in texts: | |
| for w in clean_text_deep(t).split(): | |
| if len(w) > 2 and w not in STOPWORDS: words.append(w) | |
| return [{"word":w,"count":c} for w,c in Counter(words).most_common(15)] | |
| def generate_wordcloud(texts): | |
| try: | |
| os.makedirs("static", exist_ok=True) | |
| combined = " ".join(batch_clean(texts)) | |
| if not combined.strip(): return | |
| WordCloud(width=900,height=380,background_color='#0e1117',color_func=lambda *a,**k:'#4f9cf9',max_words=80,stopwords=STOPWORDS).generate(combined).to_file("static/wordcloud.png") | |
| except Exception as e: print(f"wordcloud error: {e}") | |
| def generate_heatmap(data): | |
| try: | |
| if not data: return | |
| labels = ["Positive","Neutral","Negative","Uncertain"] | |
| sources = sorted(set(d["source"] for d in data)) | |
| matrix = np.zeros((len(sources),len(labels))) | |
| for d in data: | |
| i = sources.index(d["source"]); s = d["sentiment"] | |
| j = labels.index(s) if s in labels else 3 | |
| matrix[i][j] += 1 | |
| if matrix.sum()==0: return | |
| fig, ax = plt.subplots(figsize=(7,max(2,len(sources)))) | |
| fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820') | |
| im = ax.imshow(matrix, cmap='Blues', aspect='auto') | |
| ax.set_xticks(range(len(labels))); ax.set_xticklabels(labels, color='#8892a4', fontsize=9) | |
| ax.set_yticks(range(len(sources))); ax.set_yticklabels(sources, color='#8892a4', fontsize=9) | |
| ax.tick_params(colors='#5a6070'); plt.colorbar(im, ax=ax); plt.tight_layout() | |
| os.makedirs("static", exist_ok=True) | |
| plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor()); plt.close(fig) | |
| except Exception as e: print(f"heatmap error: {e}") | |
| def generate_timeline(data): | |
| try: | |
| if not data or len(data)<3: return | |
| os.makedirs("static", exist_ok=True) | |
| window = max(5,len(data)//10) | |
| def roll(arr,w): return [sum(arr[max(0,i-w+1):i+1])/len(arr[max(0,i-w+1):i+1]) for i in range(len(arr))] | |
| pos_r=[1 if d["sentiment"]=="Positive" else 0 for d in data] | |
| neg_r=[1 if d["sentiment"]=="Negative" else 0 for d in data] | |
| neu_r=[1 if d["sentiment"]=="Neutral" else 0 for d in data] | |
| unc_r=[1 if d["sentiment"]=="Uncertain" else 0 for d in data] | |
| x=list(range(1,len(data)+1)) | |
| fig,ax=plt.subplots(figsize=(11,3.5)); fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820') | |
| ax.fill_between(x,roll(pos_r,window),alpha=0.12,color='#22c55e') | |
| ax.fill_between(x,roll(neg_r,window),alpha=0.12,color='#ef4444') | |
| ax.plot(x,roll(pos_r,window),color='#22c55e',lw=1.8,label='Positif') | |
| ax.plot(x,roll(neg_r,window),color='#ef4444',lw=1.8,label='Negatif') | |
| ax.plot(x,roll(neu_r,window),color='#94a3b8',lw=1.2,ls='--',label='Netral') | |
| ax.plot(x,roll(unc_r,window),color='#f59e0b',lw=1.0,ls=':',label='Uncertain') | |
| ax.axhline(np.mean(pos_r),color='#22c55e',lw=0.6,ls=':',alpha=0.5) | |
| ax.axhline(np.mean(neg_r),color='#ef4444',lw=0.6,ls=':',alpha=0.5) | |
| ax.set_xlabel(f'Urutan komentar (rolling mean, window={window})',color='#5a6070',fontsize=8) | |
| ax.set_ylabel('Proporsi',color='#5a6070',fontsize=8); ax.tick_params(colors='#5a6070',labelsize=7) | |
| for sp in ax.spines.values(): sp.set_edgecolor('#1a2030') | |
| ax.legend(fontsize=8,facecolor='#141820',edgecolor='#1a2030',labelcolor='#8892a4') | |
| ax.set_ylim(0,1.05); ax.set_xlim(1,len(data)); plt.tight_layout(pad=1.0) | |
| plt.savefig("static/timeline.png",dpi=110,facecolor=fig.get_facecolor()); plt.close(fig) | |
| except Exception as e: print(f"timeline error: {e}") | |
| def predict_trend(data): | |
| if not data: return {"label":"Kurang Data","dominant":"Neutral","polarity":0.0,"confidence":0.0,"by_source":{},"summary":"Tidak ada data."} | |
| sentiments=[d["sentiment"] for d in data]; total=len(sentiments) | |
| pos=sentiments.count("Positive"); neg=sentiments.count("Negative"); neu=sentiments.count("Neutral") | |
| pos_r,neg_r,neu_r=pos/total,neg/total,neu/total; polarity=round(abs(pos_r-neg_r),3) | |
| by_source={} | |
| for d in data: | |
| src=d.get("source","unknown") | |
| if src not in by_source: by_source[src]={"Positive":0,"Negative":0,"Neutral":0,"Uncertain":0,"total":0} | |
| s=d["sentiment"] | |
| if s in by_source[src]: by_source[src][s]+=1 | |
| else: by_source[src]["Uncertain"]+=1 | |
| by_source[src]["total"]+=1 | |
| for src in by_source: | |
| t=by_source[src]["total"] | |
| by_source[src]["pos_pct"]=round(by_source[src]["Positive"]/t*100,1) | |
| by_source[src]["neg_pct"]=round(by_source[src]["Negative"]/t*100,1) | |
| by_source[src]["neu_pct"]=round(by_source[src]["Neutral"] /t*100,1) | |
| if pos_r>neg_r and pos_r>neu_r: label,dominant,conf="Dominan Positif","Positive",round(pos_r,3) | |
| elif neg_r>pos_r and neg_r>neu_r: label,dominant,conf="Dominan Negatif","Negative",round(neg_r,3) | |
| elif neu_r>=0.5: label,dominant,conf="Mayoritas Netral","Neutral",round(neu_r,3) | |
| else: label,dominant,conf="Terpolarisasi","Mixed",round(polarity,3) | |
| dom_src=max(by_source,key=lambda s:by_source[s]["total"]) if by_source else "-" | |
| return {"label":label,"dominant":dominant,"polarity":polarity,"confidence":conf,"by_source":by_source,"pos_pct":round(pos_r*100,1),"neg_pct":round(neg_r*100,1),"neu_pct":round(neu_r*100,1),"summary":f"{label} ({round(pos_r*100,1)}% positif, {round(neg_r*100,1)}% negatif, {round(neu_r*100,1)}% netral). Indeks polarisasi: {polarity:.2f}. Sumber terbanyak: {dom_src}."} | |
| def detect_hoax(texts): | |
| results=[]; sample=texts[:20] | |
| if _hoax_clf is not None: | |
| try: | |
| preds=_hoax_clf.predict(sample); probas=_hoax_clf.predict_proba(sample) | |
| for t,p,pr in zip(sample,preds,probas): | |
| results.append({"text":t,"label":"Hoax" if p==1 else "Normal","confidence":round(float(max(pr)),3),"method":"ml"}) | |
| return results | |
| except: pass | |
| KW=["hoax","bohong","fitnah","propaganda","palsu","disinformasi","menyesatkan","kebohongan","manipulasi","adu domba","provokasi","berita palsu","ujaran kebencian","tidak benar","narasi sesat"] | |
| for t in sample: | |
| sc=sum(1 for k in KW if k in t.lower()); lbl="Hoax" if sc>=1 else "Normal" | |
| results.append({"text":t,"label":lbl,"confidence":min(0.5+sc*0.1,0.95) if lbl=="Hoax" else 0.6,"method":"keyword"}) | |
| return results | |
| def get_topics(texts): | |
| try: | |
| cleaned=batch_clean(texts); cleaned=[t for t in cleaned if len(t)>3] | |
| if len(cleaned)<5: return [["data kurang"]] | |
| vec=CountVectorizer(min_df=2,stop_words=list(STOPWORDS)); X=vec.fit_transform(cleaned) | |
| if X.shape[1]==0: return [["kosong"]] | |
| n=min(3,X.shape[1]); lda=LatentDirichletAllocation(n_components=n,random_state=42); lda.fit(X) | |
| words=vec.get_feature_names_out() | |
| return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_] | |
| except Exception as e: print(f"topic error: {e}"); return [["error"]] | |
| def generate_insight(data): | |
| s=[d["sentiment"] for d in data] | |
| return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')} Uncertain:{s.count('Uncertain')}" | |
| def cluster_opinions(texts): | |
| try: | |
| if len(texts)<6: return [] | |
| cleaned=batch_clean(texts) | |
| X=TfidfVectorizer(max_features=300,stop_words=list(STOPWORDS)).fit_transform(cleaned) | |
| n=min(3,len(texts)); k=KMeans(n_clusters=n,n_init=10,random_state=42).fit(X) | |
| clusters={} | |
| for i,lbl in enumerate(k.labels_): clusters.setdefault(int(lbl),[]).append(texts[i]) | |
| return [{"cluster":l,"samples":s[:3]} for l,s in clusters.items()] | |
| except Exception as e: print(f"cluster error: {e}"); return [] | |
| def build_network(texts): | |
| edges={} | |
| for t in texts: | |
| words=[w for w in set(clean_text_deep(t).split()) if len(w)>3 and w not in STOPWORDS][:6] | |
| for a,b in combinations(words,2): | |
| key=tuple(sorted([a,b])); edges[key]=edges.get(key,0)+1 | |
| return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1] | |
| def detect_bot_network(texts): | |
| try: | |
| if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]} | |
| X=TfidfVectorizer(max_features=300).fit_transform(texts); sim=cosine_similarity(X) | |
| G=nx.Graph() | |
| for i in range(len(texts)): G.add_node(i,text=texts[i]) | |
| for i in range(len(texts)): | |
| for j in range(i+1,len(texts)): | |
| if sim[i][j]>0.75: G.add_edge(i,j) | |
| central=nx.degree_centrality(G) | |
| bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3] | |
| return {"nodes":[{"id":i} for i in G.nodes()],"edges":[{"source":u,"target":v} for u,v in G.edges()],"bots":bots[:10]} | |
| except Exception as e: print(f"bot error: {e}"); return {"nodes":[],"edges":[],"bots":[]} | |
| def run_gnn_safe(nodes, edges, texts): | |
| if not nodes or not edges or len(nodes)<3: | |
| return [{"node":n["id"],"score":0.0} for n in nodes] | |
| try: | |
| import torch | |
| from torch_geometric.data import Data | |
| from torch_geometric.nn import GCNConv | |
| node_texts=[texts[n["id"]] if n["id"]<len(texts) else "" for n in nodes] | |
| vec=TfidfVectorizer(max_features=32,min_df=1) | |
| try: X=vec.fit_transform(node_texts).toarray() | |
| except: X=np.eye(len(nodes),32) | |
| x=torch.tensor(X,dtype=torch.float) | |
| edge_list=[[e["source"],e["target"]] for e in edges if e["source"]<len(nodes) and e["target"]<len(nodes)] | |
| if not edge_list: return [{"node":n["id"],"score":0.0} for n in nodes] | |
| edge_index=torch.tensor(edge_list,dtype=torch.long).t().contiguous() | |
| class GCN(torch.nn.Module): | |
| def __init__(self,in_ch): | |
| super().__init__(); self.conv1=GCNConv(in_ch,16); self.conv2=GCNConv(16,4) | |
| def forward(self,x,ei): return self.conv2(torch.relu(self.conv1(x,ei)),ei) | |
| torch.manual_seed(42); model=GCN(x.shape[1]); model.eval() | |
| with torch.no_grad(): out=model(x,edge_index) | |
| scores=torch.norm(out,dim=1).numpy() | |
| if scores.max()>scores.min(): scores=(scores-scores.min())/(scores.max()-scores.min()) | |
| else: scores=np.zeros(len(scores)) | |
| return [{"node":nodes[i]["id"],"score":round(float(scores[i]),3)} for i in range(len(nodes))] | |
| except Exception as e: print(f"GNN error: {e}"); return [{"node":n["id"],"score":0.0} for n in nodes] | |
| # ── ROUTES ── | |
| def home(): return render_template("index.html") | |
| def result(): return render_template("result.html") | |
| def analyze(): | |
| try: | |
| body=request.json or {} | |
| keyword=body.get("keyword","").strip(); source=body.get("source","all") | |
| conf_th=float(body.get("conf_threshold",CONF_THRESHOLD)) | |
| if not keyword: return jsonify({"error":"keyword kosong","data":[]}),400 | |
| raw=collect_data(keyword,source) | |
| texts=[t for _,t in raw][:100]; sources=[s for s,_ in raw][:100] | |
| scored=predict_with_score(texts) | |
| scored_filtered=apply_confidence_filter(scored,threshold=conf_th) | |
| result_data=[{"text":t,"sentiment":s["sentiment"],"confidence":s["confidence"],"is_certain":s["is_certain"],"source":src,"scraped_at":datetime.now().strftime("%Y-%m-%d %H:%M")} for t,s,src in zip(texts,scored_filtered,sources)] | |
| conf_stats_data=confidence_stats(result_data) | |
| cross_data=cross_platform_analysis(result_data) | |
| generate_comparative_chart(cross_data) | |
| generate_wordcloud(texts); generate_heatmap(result_data); generate_timeline(result_data) | |
| top_words=get_top_words(texts); topics=get_topics(texts); insight=generate_insight(result_data) | |
| clusters=cluster_opinions(texts); trend=predict_trend(result_data); hoax=detect_hoax(texts) | |
| network=build_network(texts); bot_network=detect_bot_network(texts) | |
| gnn=run_gnn_safe(bot_network["nodes"],bot_network["edges"],texts) | |
| bot_bert=detect_bot_bert(texts); fake_news=detect_fake_news(texts) | |
| export=build_export_data(result_data,keyword,source,conf_stats_data,cross_data,trend) | |
| save_export_csv(export) | |
| return jsonify({"data":result_data,"top_words":top_words,"topics":topics,"insight":insight,"clusters":clusters,"hoax":hoax,"network":network,"bot_network":bot_network,"trend":trend,"bot_bert":bot_bert,"fake_news":fake_news,"gnn":gnn,"conf_stats":conf_stats_data,"cross_platform":cross_data,"export_summary":export["summary"],"absa":absa_result,"ner":ner_result,"stance":stance_result,"emotions":emotion_result,"keywords":keywords_result,"summaries":summaries}) | |
| except Exception as e: | |
| print(f"ERROR /analyze: {e}"); return jsonify({"error":str(e),"data":[]}),500 | |
| def download(): | |
| path="static/result.csv" | |
| if not os.path.exists(path): return jsonify({"error":"Belum ada hasil"}),404 | |
| return send_file(path,as_attachment=True) | |
| def download_summary(): | |
| path="static/summary.csv" | |
| if not os.path.exists(path): return jsonify({"error":"Belum ada summary"}),404 | |
| return send_file(path,as_attachment=True) | |
| def static_files(filename): return send_file(f"static/{filename}") | |
| if __name__=="__main__": | |
| app.run(host="0.0.0.0",port=7860,debug=False) |