| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| import joblib |
| import os |
| import re |
| import requests |
| from bs4 import BeautifulSoup |
| import json |
|
|
| |
| from keras.models import load_model |
| from keras.utils import pad_sequences |
| |
| from tensorflow.keras.preprocessing.text import tokenizer_from_json |
|
|
| app = FastAPI( |
| title="API Deteksi Hoax Multi-Model", |
| description="API untuk mendeteksi berita hoax menggunakan pilihan model.", |
| version="1.0.2" |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["https://deteksi-berita-hoax-kappa.vercel.app/"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| models = { |
| "naive_bayes": None, |
| "lstm": None |
| } |
| tokenizer = None |
|
|
| |
| PATH_NB = 'model_hoax_complete.pkl' |
| try: |
| if os.path.exists(PATH_NB): |
| models["naive_bayes"] = joblib.load(PATH_NB) |
| print("Model Naive Bayes berhasil dimuat!") |
| except Exception as e: |
| print(f"Error loading Naive Bayes: {e}") |
|
|
| |
| PATH_LSTM = 'lstm_fake_news_model.h5' |
| try: |
| if os.path.exists(PATH_LSTM): |
| models["lstm"] = load_model(PATH_LSTM) |
| print("Model LSTM berhasil dimuat!") |
| except Exception as e: |
| print(f"Error loading LSTM: {e}") |
|
|
| |
| PATH_TOKENIZER = 'tokenizer.json' |
| try: |
| if os.path.exists(PATH_TOKENIZER): |
| with open(PATH_TOKENIZER) as f: |
| data = json.load(f) |
| tokenizer = tokenizer_from_json(data) |
| print("Tokenizer LSTM (JSON) berhasil dimuat!") |
| except Exception as e: |
| print(f"Error loading Tokenizer: {e}") |
|
|
| |
| class PredictRequest(BaseModel): |
| input_text: str |
| model_type: str = "naive_bayes" |
|
|
| def scrape_berita(url): |
| """Fungsi pembaca halaman web (Scraper)""" |
| try: |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} |
| response = requests.get(url, headers=headers, timeout=10) |
| response.raise_for_status() |
| soup = BeautifulSoup(response.content, 'html.parser') |
| paragraf = soup.find_all('p') |
| teks_berita = " ".join([p.get_text() for p in paragraf]) |
| return teks_berita.strip() |
| except Exception as e: |
| return f"GAGAL: {e}" |
|
|
| @app.post("/predict") |
| def deteksi_hoax_api(request: PredictRequest): |
| |
| jenis_model = request.model_type |
| if jenis_model not in models: |
| raise HTTPException(status_code=400, detail="Pilihan model tidak valid. Gunakan 'naive_bayes' atau 'lstm'.") |
| |
| aktif_model = models[jenis_model] |
| if aktif_model is None: |
| raise HTTPException(status_code=500, detail=f"Model {jenis_model} tidak ditemukan di server.") |
|
|
| teks_mentah = request.input_text.strip() |
| if not teks_mentah: |
| raise HTTPException(status_code=400, detail="Input tidak boleh kosong.") |
|
|
| if teks_mentah.startswith("http://") or teks_mentah.startswith("https://"): |
| teks_untuk_dianalisis = scrape_berita(teks_mentah) |
| if teks_untuk_dianalisis.startswith("GAGAL:"): |
| raise HTTPException(status_code=400, detail=f"Gagal memproses URL: {teks_untuk_dianalisis}") |
| else: |
| teks_untuk_dianalisis = teks_mentah |
|
|
| |
| kamus_bobot = {} |
| prob_fakta = 0.0 |
| prob_hoax = 0.0 |
| |
| if jenis_model == "naive_bayes": |
| proba = aktif_model.predict_proba([teks_untuk_dianalisis])[0] |
| prob_fakta = float(proba[0]) |
| prob_hoax = float(proba[1]) |
| |
| |
| try: |
| vec = aktif_model[0] |
| clf = aktif_model[1] |
| feature_names = vec.get_feature_names_out() |
| log_odds = clf.feature_log_prob_[1] - clf.feature_log_prob_[0] |
| kamus_bobot = dict(zip(feature_names, log_odds)) |
| except Exception: |
| pass |
|
|
| elif jenis_model == "lstm": |
| if tokenizer is None: |
| raise HTTPException(status_code=500, detail="Tokenizer model LSTM tidak ditemukan di server.") |
|
|
| |
| sequence = tokenizer.texts_to_sequences([teks_untuk_dianalisis]) |
| |
| |
| MAX_LEN = 150 |
| padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post') |
| |
| |
| prediksi_mentah = aktif_model.predict(padded_sequence, verbose=0)[0] |
| |
| |
| if len(prediksi_mentah) >= 2: |
| prob_fakta = float(prediksi_mentah[0]) |
| prob_hoax = float(prediksi_mentah[1]) |
| else: |
| nilai = float(prediksi_mentah[0]) |
| prob_hoax = nilai |
| prob_fakta = 1.0 - nilai |
|
|
| |
| kata_kata = teks_untuk_dianalisis.split() |
| teks_highlight = [] |
| |
| for kata in kata_kata: |
| kata_bersih = re.sub(r'[^a-z]', '', kata.lower()) |
| bobot = float(kamus_bobot.get(kata_bersih, 0)) |
| |
| if bobot > 0.3: |
| label_kata = "Hoax" |
| elif bobot < -0.3: |
| label_kata = "Fakta" |
| else: |
| label_kata = "Netral" |
| |
| teks_highlight.append({ |
| "kata": kata, |
| "label": label_kata, |
| "bobot": round(bobot, 4) |
| }) |
|
|
| return { |
| "status": "success", |
| "hasil_analisis": { |
| "model_digunakan": jenis_model, |
| "teks_dianalisis": teks_untuk_dianalisis, |
| "prediksi_utama": "HOAX" if prob_hoax > prob_fakta else "FAKTA", |
| "probabilitas": { |
| "fakta": round(prob_fakta * 100, 2), |
| "hoax": round(prob_hoax * 100, 2) |
| } |
| }, |
| "bedah_kata": teks_highlight |
| } |