Spaces:
Running
Running
auth: use pass_hash + prenorm + lazy LoRA load + translate/history
Browse files- app.py +239 -371
- frontend/index.html +6 -25
app.py
CHANGED
|
@@ -1,421 +1,289 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
)
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
|
|
|
|
| 11 |
from sqlalchemy import (
|
| 12 |
-
create_engine, Column, Integer,
|
| 13 |
)
|
| 14 |
from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
log
|
| 21 |
-
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret")
|
| 27 |
-
|
| 28 |
-
# =========================
|
| 29 |
-
# Database (Supabase or fallback SQLite)
|
| 30 |
-
# =========================
|
| 31 |
-
def normalize_database_url(url: Optional[str]) -> str:
|
| 32 |
-
if not url:
|
| 33 |
-
return "sqlite:////tmp/app.db"
|
| 34 |
-
url = url.strip()
|
| 35 |
-
if url.startswith("postgresql://") and "psycopg2" not in url:
|
| 36 |
-
url = "postgresql+psycopg2://" + url.split("postgresql://", 1)[1]
|
| 37 |
-
if url.startswith("postgresql+psycopg2://") and "sslmode=" not in url:
|
| 38 |
-
sep = "&" if "?" in url else "?"
|
| 39 |
-
url = f"{url}{sep}sslmode=require"
|
| 40 |
-
return url
|
| 41 |
-
|
| 42 |
-
DATABASE_URL = normalize_database_url(os.getenv("DATABASE_URL"))
|
| 43 |
-
try:
|
| 44 |
-
engine = create_engine(DATABASE_URL, pool_pre_ping=True)
|
| 45 |
-
log.info("[DB] Ready: %s", DATABASE_URL)
|
| 46 |
-
except Exception as e:
|
| 47 |
-
log.error("[DB] init error: %s", e)
|
| 48 |
-
engine = create_engine("sqlite:////tmp/app.db")
|
| 49 |
-
log.info("[DB] Fallback: sqlite:////tmp/app.db")
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
Base = declarative_base()
|
| 53 |
|
| 54 |
-
|
| 55 |
-
# ORM Models (jangan map kolom 'pass_hash' di sini agar query aman)
|
| 56 |
-
# =========================
|
| 57 |
-
class User(Base, UserMixin):
|
| 58 |
__tablename__ = "users"
|
| 59 |
id = Column(Integer, primary_key=True)
|
| 60 |
-
email = Column(
|
| 61 |
-
|
| 62 |
-
created_at = Column(DateTime, server_default=func.now())
|
| 63 |
-
translations = relationship("Translation", back_populates="user")
|
| 64 |
|
| 65 |
class Translation(Base):
|
| 66 |
__tablename__ = "translations"
|
| 67 |
id = Column(Integer, primary_key=True)
|
| 68 |
-
user_id = Column(Integer, ForeignKey("users.id"))
|
| 69 |
-
src = Column(Text)
|
| 70 |
-
mt = Column(Text)
|
| 71 |
-
created_at = Column(DateTime, server_default=func.now())
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
def
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
if c["name"] == col:
|
| 90 |
-
return c.get("nullable", None)
|
| 91 |
-
return None
|
| 92 |
-
|
| 93 |
-
def auto_migrate_users_table():
|
| 94 |
-
insp = inspect(engine)
|
| 95 |
-
if not insp.has_table("users"):
|
| 96 |
-
log.info("[DB] Table users belum ada; ORM sudah membuatnya via create_all")
|
| 97 |
-
return
|
| 98 |
-
|
| 99 |
-
cols = {c["name"] for c in insp.get_columns("users")}
|
| 100 |
-
alters = []
|
| 101 |
-
if "password" not in cols:
|
| 102 |
-
alters.append("ALTER TABLE users ADD COLUMN password VARCHAR(255)")
|
| 103 |
-
if "created_at" not in cols:
|
| 104 |
-
alters.append("ALTER TABLE users ADD COLUMN created_at TIMESTAMP DEFAULT NOW()")
|
| 105 |
-
|
| 106 |
-
# Eksekusi ALTER yang perlu
|
| 107 |
-
if alters:
|
| 108 |
-
with engine.begin() as conn:
|
| 109 |
-
for stmt in alters:
|
| 110 |
-
log.info("[DB] MIGRATE: %s", stmt)
|
| 111 |
-
conn.execute(text(stmt))
|
| 112 |
-
|
| 113 |
-
# Jika ada kolom pass_hash, pastikan terisi (backfill dari password)
|
| 114 |
-
if "pass_hash" in cols:
|
| 115 |
-
with engine.begin() as conn:
|
| 116 |
-
# Backfill: kalau pass_hash NULL & password IS NOT NULL β copy
|
| 117 |
-
conn.execute(text("""
|
| 118 |
-
UPDATE users
|
| 119 |
-
SET pass_hash = password
|
| 120 |
-
WHERE pass_hash IS NULL AND password IS NOT NULL
|
| 121 |
-
"""))
|
| 122 |
-
log.info("[DB] Skema users sudah sesuai")
|
| 123 |
-
|
| 124 |
-
auto_migrate_users_table()
|
| 125 |
-
|
| 126 |
-
# =========================
|
| 127 |
-
# Auth
|
| 128 |
-
# =========================
|
| 129 |
-
login_manager = LoginManager()
|
| 130 |
-
login_manager.login_view = "login_get"
|
| 131 |
-
login_manager.init_app(app)
|
| 132 |
-
|
| 133 |
-
@login_manager.user_loader
|
| 134 |
-
def load_user(uid: str):
|
| 135 |
-
s = Session()
|
| 136 |
-
try:
|
| 137 |
-
return s.get(User, int(uid))
|
| 138 |
-
finally:
|
| 139 |
-
s.close()
|
| 140 |
-
|
| 141 |
-
# =========================
|
| 142 |
-
# Pre-norm & Heuristics
|
| 143 |
-
# =========================
|
| 144 |
-
_PRE_NORM_ON = os.getenv("PRE_NORM", "1") != "0"
|
| 145 |
-
_N_BEST = max(1, int(os.getenv("N_BEST", "1")))
|
| 146 |
-
_COPY_PENALTY = os.getenv("COPY_PENALTY", "1") != "0"
|
| 147 |
-
|
| 148 |
-
_PN_SPACE = re.compile(r"\s+")
|
| 149 |
-
_PN_REPEAT = re.compile(r"(.)\1{3,}")
|
| 150 |
-
_PN_SYMBOLS = { "β":"\"", "β":"\"", "β":"'", "β":"'" }
|
| 151 |
-
_PN_RULES = [
|
| 152 |
-
(r"\bsa\b", "saya"),
|
| 153 |
-
(r"\bko\b", "kamu"),
|
| 154 |
-
(r"\btra\b", "tidak"),
|
| 155 |
-
(r"\bsu\b", "sudah"),
|
| 156 |
-
(r"\bbeta\b", "saya"),
|
| 157 |
-
(r"\bdorang\b", "mereka"),
|
| 158 |
-
(r"\bdong\b", "mereka"),
|
| 159 |
-
(r"\bma\b", "tetapi"),
|
| 160 |
-
]
|
| 161 |
-
|
| 162 |
-
def pre_norm(text: str) -> str:
|
| 163 |
-
if not text: return text
|
| 164 |
-
t = text.replace("\ufeff", "")
|
| 165 |
-
for k, v in _PN_SYMBOLS.items(): t = t.replace(k, v)
|
| 166 |
-
t = _PN_SPACE.sub(" ", t.strip())
|
| 167 |
-
t = _PN_REPEAT.sub(r"\1\1", t)
|
| 168 |
-
lower = t.lower()
|
| 169 |
-
for rx, rep in _PN_RULES:
|
| 170 |
-
lower = re.sub(rx, rep, lower)
|
| 171 |
-
return lower.capitalize()
|
| 172 |
-
|
| 173 |
-
def copy_distance(a: str, b: str) -> float:
|
| 174 |
-
if not a or not b: return 1.0
|
| 175 |
-
a, b = a.lower(), b.lower()
|
| 176 |
-
same = int(a == b)
|
| 177 |
-
sub = sum(1 for x, y in zip(a.split(), b.split()) if x == y)
|
| 178 |
-
return max(0.0, 1.0 - (same * 0.7 + sub / (len(a.split()) + 1)))
|
| 179 |
-
|
| 180 |
-
# =========================
|
| 181 |
-
# Model (lazy)
|
| 182 |
-
# =========================
|
| 183 |
-
DEVICE = "cpu"
|
| 184 |
-
_tok = None
|
| 185 |
-
_model = None
|
| 186 |
-
|
| 187 |
-
def load_model_safe():
|
| 188 |
-
global _tok, _model, DEVICE
|
| 189 |
-
if _model is not None: return
|
| 190 |
-
base = os.getenv("BASE_MODEL", "").strip()
|
| 191 |
-
adapter = os.getenv("LORA_ADAPTER", "").strip()
|
| 192 |
-
if not base:
|
| 193 |
-
log.warning("[MODEL] BASE_MODEL not set -> DUMMY mode")
|
| 194 |
-
return
|
| 195 |
-
try:
|
| 196 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 197 |
-
from peft import PeftModel
|
| 198 |
-
import torch
|
| 199 |
-
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 200 |
-
log.info("[INFO] Using device: %s", DEVICE)
|
| 201 |
-
log.info("[INFO] Base model: %s", base)
|
| 202 |
-
if adapter: log.info("[INFO] Adapter : %s", adapter)
|
| 203 |
-
|
| 204 |
-
_tok = AutoTokenizer.from_pretrained(base)
|
| 205 |
-
_model = AutoModelForSeq2SeqLM.from_pretrained(base)
|
| 206 |
-
if adapter:
|
| 207 |
-
try:
|
| 208 |
-
_model = PeftModel.from_pretrained(_model, adapter)
|
| 209 |
-
except Exception as e:
|
| 210 |
-
log.error("[MODEL] adapter load error: %s", e)
|
| 211 |
-
|
| 212 |
-
_model.eval()
|
| 213 |
-
if DEVICE == "cuda":
|
| 214 |
-
_model.to("cuda")
|
| 215 |
-
except Exception as e:
|
| 216 |
-
log.error("[MODEL] load error: %s", e)
|
| 217 |
-
_tok, _model = None, None
|
| 218 |
-
|
| 219 |
-
def generate_n_best(prompt: str, n_best: int, max_new_tokens: int) -> List[str]:
|
| 220 |
-
load_model_safe()
|
| 221 |
-
if _model is None or _tok is None:
|
| 222 |
-
return [f"(dummy) {prompt}"]
|
| 223 |
-
import torch
|
| 224 |
-
inputs = _tok([prompt], return_tensors="pt", padding=True, truncation=True)
|
| 225 |
-
if DEVICE == "cuda":
|
| 226 |
-
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
| 227 |
-
outs = []
|
| 228 |
-
for seed in range(n_best):
|
| 229 |
-
torch.manual_seed(1234 + seed)
|
| 230 |
-
with torch.no_grad():
|
| 231 |
-
out = _model.generate(
|
| 232 |
-
**inputs,
|
| 233 |
-
max_new_tokens=max_new_tokens,
|
| 234 |
-
do_sample=True, top_p=0.9, top_k=40, temperature=0.9,
|
| 235 |
-
num_return_sequences=1
|
| 236 |
-
)
|
| 237 |
-
detok = _tok.batch_decode(out, skip_special_tokens=True)
|
| 238 |
-
if detok: outs.append(detok[0])
|
| 239 |
-
return outs or [""]
|
| 240 |
-
|
| 241 |
-
def score_candidate(src: str, hyp: str) -> float:
|
| 242 |
-
score = 1.0
|
| 243 |
-
if len(hyp.split()) <= 2: score -= 0.3
|
| 244 |
-
if _COPY_PENALTY: score += copy_distance(src, hyp) * 0.5
|
| 245 |
-
if src.strip().lower() == hyp.strip().lower(): score -= 0.5
|
| 246 |
-
return score
|
| 247 |
-
|
| 248 |
-
def translate_core(text: str, max_new_tokens: int = 32) -> str:
|
| 249 |
-
raw = text or ""
|
| 250 |
-
src = pre_norm(raw) if _PRE_NORM_ON else raw
|
| 251 |
-
cands = generate_n_best(src, max(1, _N_BEST), max_new_tokens)
|
| 252 |
-
hyp = cands[0] if len(cands) == 1 else sorted(
|
| 253 |
-
((score_candidate(src, h), h) for h in cands),
|
| 254 |
-
key=lambda x: x[0], reverse=True
|
| 255 |
-
)[0][1]
|
| 256 |
-
hyp = hyp.strip()
|
| 257 |
-
if not hyp.endswith(('.', '!', '?')): hyp += '.'
|
| 258 |
-
if hyp: hyp = hyp[0].upper() + hyp[1:]
|
| 259 |
-
return hyp
|
| 260 |
-
|
| 261 |
-
# =========================
|
| 262 |
-
# Helpers untuk register/login menyesuaikan kolom pass_hash
|
| 263 |
-
# =========================
|
| 264 |
-
def users_has_pass_hash() -> bool:
|
| 265 |
-
return has_col("users", "pass_hash")
|
| 266 |
-
|
| 267 |
-
def insert_user(email: str, hashed: str) -> int:
|
| 268 |
-
"""Insert user. Jika ada kolom pass_hash, isi keduanya."""
|
| 269 |
-
with engine.begin() as conn:
|
| 270 |
-
if users_has_pass_hash():
|
| 271 |
-
# Isi keduanya agar constraint NOT NULL terpenuhi
|
| 272 |
-
row = conn.execute(
|
| 273 |
-
text("INSERT INTO users (email, password, pass_hash) VALUES (:e, :p, :p) RETURNING id"),
|
| 274 |
-
{"e": email, "p": hashed}
|
| 275 |
-
).first()
|
| 276 |
-
else:
|
| 277 |
-
row = conn.execute(
|
| 278 |
-
text("INSERT INTO users (email, password) VALUES (:e, :p) RETURNING id"),
|
| 279 |
-
{"e": email, "p": hashed}
|
| 280 |
-
).first()
|
| 281 |
-
return int(row[0])
|
| 282 |
-
|
| 283 |
-
# =========================
|
| 284 |
-
# Routes
|
| 285 |
-
# =========================
|
| 286 |
-
@app.get("/")
|
| 287 |
-
def home():
|
| 288 |
-
if not current_user.is_authenticated:
|
| 289 |
-
return redirect(url_for("login_get"))
|
| 290 |
-
s = Session()
|
| 291 |
try:
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
return
|
| 303 |
-
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
@app.get("/login")
|
| 307 |
def login_get():
|
| 308 |
-
|
| 309 |
-
return redirect(url_for("home"))
|
| 310 |
-
return render_template("login.html", error=None, logged_in=False)
|
| 311 |
|
| 312 |
@app.post("/login")
|
| 313 |
def login_post():
|
| 314 |
email = (request.form.get("email") or "").strip().lower()
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
|
|
|
|
|
|
|
|
|
| 318 |
u = s.query(User).filter_by(email=email).first()
|
| 319 |
-
if not u or not
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
| 325 |
|
| 326 |
@app.get("/register")
|
| 327 |
def register_get():
|
| 328 |
-
|
| 329 |
-
return redirect(url_for("home"))
|
| 330 |
-
return render_template("register.html", error=None, logged_in=False)
|
| 331 |
|
| 332 |
@app.post("/register")
|
| 333 |
def register_post():
|
| 334 |
email = (request.form.get("email") or "").strip().lower()
|
| 335 |
-
|
| 336 |
-
if not email or not
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
return
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
# Load lagi usernya untuk login_user
|
| 351 |
-
u = s.get(User, new_id)
|
| 352 |
-
if not u:
|
| 353 |
-
# fallback: buat ORM object (harusnya tidak kejadian)
|
| 354 |
-
u = User(id=new_id, email=email, password=hashed)
|
| 355 |
-
s.merge(u); s.commit()
|
| 356 |
-
|
| 357 |
-
login_user(u, remember=True, duration=dt.timedelta(days=7))
|
| 358 |
-
return redirect(url_for("home"))
|
| 359 |
-
finally:
|
| 360 |
-
s.close()
|
| 361 |
|
| 362 |
@app.get("/logout")
|
| 363 |
-
@login_required
|
| 364 |
def logout():
|
| 365 |
-
|
| 366 |
return redirect(url_for("login_get"))
|
| 367 |
|
| 368 |
-
@app.
|
| 369 |
@login_required
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
def api_translate():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
try:
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
try:
|
| 380 |
-
rec = Translation(user_id=current_user.id, src=text_in, mt=mt)
|
| 381 |
-
s.add(rec); s.commit()
|
| 382 |
-
finally:
|
| 383 |
-
s.close()
|
| 384 |
return jsonify({"mt": mt})
|
| 385 |
except Exception as e:
|
| 386 |
log.exception("translate error: %s", e)
|
| 387 |
return jsonify({"error": "server error"}), 500
|
| 388 |
|
| 389 |
@app.get("/history")
|
| 390 |
-
@login_required
|
| 391 |
def api_history():
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
return
|
| 409 |
-
|
| 410 |
-
@app.get("/health")
|
| 411 |
-
def health():
|
| 412 |
-
return {"status": "ok"}
|
| 413 |
-
|
| 414 |
-
@app.errorhandler(500)
|
| 415 |
-
def err500(e):
|
| 416 |
-
log.exception("Unhandled 500: %s", e)
|
| 417 |
-
# Tampilkan form login agar user bisa ulangi langkah
|
| 418 |
-
return render_template("login.html", error="Terjadi kesalahan server. Coba lagi."), 500
|
| 419 |
|
| 420 |
if __name__ == "__main__":
|
|
|
|
| 421 |
app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# PapuaTranslate β Flask 3 + SQLAlchemy + Supabase + mT5-LoRA (lazy load) + prenorm
|
| 3 |
+
import os, re, json, logging, threading
|
| 4 |
+
from datetime import datetime, timezone
|
| 5 |
+
from functools import wraps
|
| 6 |
+
from typing import Optional, Tuple, List
|
| 7 |
+
|
| 8 |
+
from flask import (
|
| 9 |
+
Flask, render_template, request, redirect, url_for,
|
| 10 |
+
session, jsonify, flash
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
# ========= Logging =========
|
| 14 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
|
| 15 |
+
log = logging.getLogger("papua-app")
|
| 16 |
|
| 17 |
+
# ========= Flask =========
|
| 18 |
+
app = Flask(
|
| 19 |
+
__name__,
|
| 20 |
+
template_folder="frontend", # folder HTML kamu
|
| 21 |
+
static_folder="static"
|
| 22 |
)
|
| 23 |
+
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret-change-me")
|
| 24 |
+
app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # biar fetch same-origin ikut kirim cookie
|
| 25 |
+
app.config["SESSION_COOKIE_SECURE"] = False # True kalau pakai https strict
|
| 26 |
|
| 27 |
+
# ========= DB: SQLAlchemy (Supabase Postgres) =========
|
| 28 |
from sqlalchemy import (
|
| 29 |
+
create_engine, Column, Integer, Text, DateTime, ForeignKey, func
|
| 30 |
)
|
| 31 |
from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
|
| 32 |
|
| 33 |
+
DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("DB_URL")
|
| 34 |
+
if not DATABASE_URL:
|
| 35 |
+
# fallback aman untuk run lokal
|
| 36 |
+
DATABASE_URL = "sqlite:////tmp/app.db"
|
| 37 |
+
log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
|
| 38 |
+
|
| 39 |
+
# tambahkan sslmode=require kalau Postgres dan belum ada
|
| 40 |
+
if DATABASE_URL.startswith("postgresql") and "sslmode=" not in DATABASE_URL:
|
| 41 |
+
sep = "&" if "?" in DATABASE_URL else "?"
|
| 42 |
+
DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
engine = create_engine(
|
| 45 |
+
DATABASE_URL,
|
| 46 |
+
pool_pre_ping=True,
|
| 47 |
+
)
|
| 48 |
+
SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False))
|
| 49 |
Base = declarative_base()
|
| 50 |
|
| 51 |
+
class User(Base):
|
|
|
|
|
|
|
|
|
|
| 52 |
__tablename__ = "users"
|
| 53 |
id = Column(Integer, primary_key=True)
|
| 54 |
+
email = Column(Text, unique=True, nullable=False)
|
| 55 |
+
pass_hash = Column(Text, nullable=False) # WAJIB: pakai pass_hash
|
| 56 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
|
|
|
| 57 |
|
| 58 |
class Translation(Base):
|
| 59 |
__tablename__ = "translations"
|
| 60 |
id = Column(Integer, primary_key=True)
|
| 61 |
+
user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
|
| 62 |
+
src = Column(Text, nullable=False)
|
| 63 |
+
mt = Column(Text, nullable=False)
|
| 64 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 65 |
+
|
| 66 |
+
user = relationship("User")
|
| 67 |
+
|
| 68 |
+
# create_all aman untuk SQLite; untuk Supabase jika tabel sudah ada, ini tidak mengubah skema
|
| 69 |
+
try:
|
| 70 |
+
Base.metadata.create_all(engine)
|
| 71 |
+
log.info("[DB] Ready: %s", DATABASE_URL)
|
| 72 |
+
except Exception as e:
|
| 73 |
+
log.exception("[DB] init error: %s", e)
|
| 74 |
+
|
| 75 |
+
# ========= Auth helpers =========
|
| 76 |
+
from werkzeug.security import generate_password_hash, check_password_hash
|
| 77 |
+
|
| 78 |
+
def set_password(user: User, raw: str):
|
| 79 |
+
user.pass_hash = generate_password_hash(raw)
|
| 80 |
+
|
| 81 |
+
def verify_password(user: User, raw: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
try:
|
| 83 |
+
return check_password_hash(user.pass_hash, raw)
|
| 84 |
+
except Exception:
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
def login_required(fn):
|
| 88 |
+
@wraps(fn)
|
| 89 |
+
def _wrap(*args, **kwargs):
|
| 90 |
+
if not session.get("uid"):
|
| 91 |
+
return redirect(url_for("login_get"))
|
| 92 |
+
return fn(*args, **kwargs)
|
| 93 |
+
return _wrap
|
| 94 |
+
|
| 95 |
+
# ========= Prenorm (heuristik ringan agar input lebih bersih) =========
|
| 96 |
+
# Kamu bisa kembangkan sesuai kebutuhanmu.
|
| 97 |
+
PAPUA_MAP = {
|
| 98 |
+
r"\bsa\b": "saya",
|
| 99 |
+
r"\bko\b": "kamu",
|
| 100 |
+
r"\btra\b": "tidak",
|
| 101 |
+
r"\bndak\b": "tidak",
|
| 102 |
+
r"\bmo\b": "mau",
|
| 103 |
+
r"\bpu\b": "punya",
|
| 104 |
+
r"\bsu\b": "sudah",
|
| 105 |
+
r"\bkong\b": "kemudian",
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
def prenorm(text: str) -> str:
|
| 109 |
+
t = text.strip()
|
| 110 |
+
t = re.sub(r"\s+", " ", t)
|
| 111 |
+
# normalisasi tanda baca aneh
|
| 112 |
+
t = t.replace("β¦", "...").replace("β", "-").replace("β", "-")
|
| 113 |
+
# map kata umum dialek β baku (heuristik)
|
| 114 |
+
for pat, repl in PAPUA_MAP.items():
|
| 115 |
+
t = re.sub(pat, repl, t, flags=re.IGNORECASE)
|
| 116 |
+
return t
|
| 117 |
+
|
| 118 |
+
# ========= Model (lazy-load LoRA) =========
|
| 119 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 120 |
+
from peft import PeftModel
|
| 121 |
+
|
| 122 |
+
BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
|
| 123 |
+
ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
|
| 124 |
+
DEVICE = os.getenv("DEVICE", "cpu") # di Spaces CPU
|
| 125 |
+
TOK = None
|
| 126 |
+
MODEL = None
|
| 127 |
+
_MODEL_LOCK = threading.Lock()
|
| 128 |
+
|
| 129 |
+
def _load_model():
|
| 130 |
+
global TOK, MODEL
|
| 131 |
+
log.info("[MODEL] loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID)
|
| 132 |
+
TOK = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
|
| 133 |
+
base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
|
| 134 |
+
MODEL = PeftModel.from_pretrained(base, ADAPTER_ID)
|
| 135 |
+
MODEL.eval()
|
| 136 |
+
if DEVICE == "cpu":
|
| 137 |
+
MODEL.to("cpu")
|
| 138 |
+
log.info("[MODEL] ready")
|
| 139 |
+
|
| 140 |
+
def get_model():
|
| 141 |
+
global MODEL
|
| 142 |
+
if MODEL is None:
|
| 143 |
+
with _MODEL_LOCK:
|
| 144 |
+
if MODEL is None:
|
| 145 |
+
_load_model()
|
| 146 |
+
return TOK, MODEL
|
| 147 |
+
|
| 148 |
+
def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
|
| 149 |
+
tok, m = get_model()
|
| 150 |
+
inputs = tok([text], return_tensors="pt")
|
| 151 |
+
outputs = m.generate(
|
| 152 |
+
**inputs,
|
| 153 |
+
max_new_tokens=max_new_tokens,
|
| 154 |
+
num_beams=4,
|
| 155 |
+
length_penalty=0.9,
|
| 156 |
+
no_repeat_ngram_size=3,
|
| 157 |
+
early_stopping=True,
|
| 158 |
+
)
|
| 159 |
+
mt = tok.decode(outputs[0], skip_special_tokens=True)
|
| 160 |
+
return mt
|
| 161 |
+
|
| 162 |
+
# ========= Hooks & small utils =========
|
| 163 |
+
@app.before_request
|
| 164 |
+
def _log_req():
|
| 165 |
+
path = request.path
|
| 166 |
+
if path not in ("/health", "/ping", "/favicon.ico"):
|
| 167 |
+
log.info("[REQ] %s %s", request.method, path)
|
| 168 |
+
|
| 169 |
+
# ========= Routes =========
|
| 170 |
+
@app.get("/health")
|
| 171 |
+
@app.get("/ping")
|
| 172 |
+
def health():
|
| 173 |
+
return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
|
| 174 |
|
| 175 |
@app.get("/login")
|
| 176 |
def login_get():
|
| 177 |
+
return render_template("login.html")
|
|
|
|
|
|
|
| 178 |
|
| 179 |
@app.post("/login")
|
| 180 |
def login_post():
|
| 181 |
email = (request.form.get("email") or "").strip().lower()
|
| 182 |
+
pwd = request.form.get("password") or ""
|
| 183 |
+
if not email or not pwd:
|
| 184 |
+
flash("Isi email dan password", "error")
|
| 185 |
+
return redirect(url_for("login_get"))
|
| 186 |
+
|
| 187 |
+
with SessionLocal() as s:
|
| 188 |
u = s.query(User).filter_by(email=email).first()
|
| 189 |
+
if not u or not verify_password(u, pwd):
|
| 190 |
+
flash("Email atau password salah", "error")
|
| 191 |
+
return redirect(url_for("login_get"))
|
| 192 |
+
|
| 193 |
+
session["uid"] = u.id
|
| 194 |
+
session["email"] = u.email
|
| 195 |
+
return redirect(url_for("index"))
|
| 196 |
|
| 197 |
@app.get("/register")
|
| 198 |
def register_get():
|
| 199 |
+
return render_template("register.html")
|
|
|
|
|
|
|
| 200 |
|
| 201 |
@app.post("/register")
|
| 202 |
def register_post():
|
| 203 |
email = (request.form.get("email") or "").strip().lower()
|
| 204 |
+
pwd = request.form.get("password") or ""
|
| 205 |
+
if not email or not pwd:
|
| 206 |
+
flash("Isi email dan password", "error")
|
| 207 |
+
return redirect(url_for("register_get"))
|
| 208 |
+
|
| 209 |
+
with SessionLocal() as s:
|
| 210 |
+
if s.query(User).filter_by(email=email).first():
|
| 211 |
+
flash("Email sudah terdaftar", "error")
|
| 212 |
+
return redirect(url_for("register_get"))
|
| 213 |
+
u = User(email=email)
|
| 214 |
+
set_password(u, pwd) # SIMPAN di pass_hash
|
| 215 |
+
s.add(u); s.commit()
|
| 216 |
+
session["uid"] = u.id
|
| 217 |
+
session["email"] = u.email
|
| 218 |
+
return redirect(url_for("index"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
@app.get("/logout")
|
|
|
|
| 221 |
def logout():
|
| 222 |
+
session.clear()
|
| 223 |
return redirect(url_for("login_get"))
|
| 224 |
|
| 225 |
+
@app.get("/")
|
| 226 |
@login_required
|
| 227 |
+
def index():
|
| 228 |
+
device = DEVICE
|
| 229 |
+
# ambil 10 history terakhir
|
| 230 |
+
with SessionLocal() as s:
|
| 231 |
+
uid = session.get("uid")
|
| 232 |
+
items = (
|
| 233 |
+
s.query(Translation)
|
| 234 |
+
.filter(Translation.user_id == uid)
|
| 235 |
+
.order_by(Translation.id.desc())
|
| 236 |
+
.limit(10)
|
| 237 |
+
.all()
|
| 238 |
+
)
|
| 239 |
+
recent = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
|
| 240 |
+
return render_template("index.html", logged_in=True, device=device, recent=recent)
|
| 241 |
+
|
| 242 |
+
@app.post("/translate")
|
| 243 |
def api_translate():
|
| 244 |
+
# jika ingin wajib login:
|
| 245 |
+
if not session.get("uid"):
|
| 246 |
+
return jsonify({"error": "Unauthorized"}), 401
|
| 247 |
+
|
| 248 |
+
data = request.get_json(silent=True) or {}
|
| 249 |
+
text = (data.get("text") or "").strip()
|
| 250 |
+
max_new = int(data.get("max_new_tokens", 48))
|
| 251 |
+
if not text:
|
| 252 |
+
return jsonify({"error": "Empty text"}), 400
|
| 253 |
+
|
| 254 |
try:
|
| 255 |
+
# langkah prenorm β model
|
| 256 |
+
clean = prenorm(text)
|
| 257 |
+
mt = translate_with_model(clean, max_new_tokens=max_new)
|
| 258 |
+
# simpan riwayat
|
| 259 |
+
with SessionLocal() as s:
|
| 260 |
+
s.add(Translation(user_id=session["uid"], src=text, mt=mt))
|
| 261 |
+
s.commit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
return jsonify({"mt": mt})
|
| 263 |
except Exception as e:
|
| 264 |
log.exception("translate error: %s", e)
|
| 265 |
return jsonify({"error": "server error"}), 500
|
| 266 |
|
| 267 |
@app.get("/history")
|
|
|
|
| 268 |
def api_history():
|
| 269 |
+
if not session.get("uid"):
|
| 270 |
+
return jsonify({"items": []})
|
| 271 |
+
with SessionLocal() as s:
|
| 272 |
+
uid = session["uid"]
|
| 273 |
+
items = (
|
| 274 |
+
s.query(Translation)
|
| 275 |
+
.filter(Translation.user_id == uid)
|
| 276 |
+
.order_by(Translation.id.desc())
|
| 277 |
+
.limit(10)
|
| 278 |
+
.all()
|
| 279 |
+
)
|
| 280 |
+
out = [{
|
| 281 |
+
"src": it.src,
|
| 282 |
+
"mt": it.mt,
|
| 283 |
+
"created_at": it.created_at.strftime("%Y-%m-%d %H:%M")
|
| 284 |
+
} for it in items]
|
| 285 |
+
return jsonify({"items": out})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
if __name__ == "__main__":
|
| 288 |
+
# untuk run lokal: python app.py
|
| 289 |
app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)
|
frontend/index.html
CHANGED
|
@@ -44,8 +44,6 @@
|
|
| 44 |
</style>
|
| 45 |
</head>
|
| 46 |
<body>
|
| 47 |
-
<script>window.LOGGED_IN = {{ 'true' if logged_in else 'false' }};</script>
|
| 48 |
-
|
| 49 |
<header>
|
| 50 |
<div class="container header-content">
|
| 51 |
<div class="logo"><h1>PapuaTranslate</h1></div>
|
|
@@ -54,13 +52,7 @@
|
|
| 54 |
<li><a href="/">Home</a></li>
|
| 55 |
<li><a href="/about">About</a></li>
|
| 56 |
<li><a href="/#history">History</a></li>
|
| 57 |
-
|
| 58 |
-
<li class="auth-badge">Login: <span class="pill">Aktif</span></li>
|
| 59 |
-
<li><a href="/logout">Logout</a></li>
|
| 60 |
-
{% else %}
|
| 61 |
-
<li><a href="/login">Login</a></li>
|
| 62 |
-
<li><a href="/register">Daftar</a></li>
|
| 63 |
-
{% endif %}
|
| 64 |
</ul>
|
| 65 |
</nav>
|
| 66 |
</div>
|
|
@@ -69,7 +61,7 @@
|
|
| 69 |
<section class="hero">
|
| 70 |
<div class="container">
|
| 71 |
<h2>Translasi Dialek Papua β Bahasa Indonesia Baku</h2>
|
| 72 |
-
<p>
|
| 73 |
</div>
|
| 74 |
</section>
|
| 75 |
|
|
@@ -112,22 +104,15 @@
|
|
| 112 |
</div>
|
| 113 |
</section>
|
| 114 |
|
| 115 |
-
<section class="info">
|
| 116 |
-
<div class="container">
|
| 117 |
-
<h3>Tentang Aplikasi</h3>
|
| 118 |
-
<p>PapuaTranslate menerapkan arsitektur CENDOL berbasis mT5 dengan LoRA adapter untuk menerjemahkan dialek Papua ke Bahasa Indonesia baku secara ringkas dan aman.</p>
|
| 119 |
-
</div>
|
| 120 |
-
</section>
|
| 121 |
-
|
| 122 |
<footer>
|
| 123 |
<div class="container">
|
| 124 |
<div class="footer-content">
|
| 125 |
<div class="footer-section">
|
| 126 |
<h3>PapuaTranslate</h3>
|
| 127 |
-
<p>Yogotak Hubuluk, Motok Hanorogo
|
| 128 |
</div>
|
| 129 |
<div class="footer-section">
|
| 130 |
-
<h3>
|
| 131 |
<p>Model: mT5 Base + LoRA</p>
|
| 132 |
<p>Device: {{ device }}</p>
|
| 133 |
</div>
|
|
@@ -140,14 +125,10 @@
|
|
| 140 |
|
| 141 |
<script>
|
| 142 |
async function translateText() {
|
| 143 |
-
if (window.LOGGED_IN !== true && window.LOGGED_IN !== 'true') {
|
| 144 |
-
window.location.href = '/login';
|
| 145 |
-
return;
|
| 146 |
-
}
|
| 147 |
const inputText = document.getElementById('papua-input').value.trim();
|
| 148 |
const outputElement = document.getElementById('indonesia-output');
|
| 149 |
const translateBtn = document.getElementById('translate-btn');
|
| 150 |
-
if (!inputText) { outputElement.textContent = "Silakan masukkan teks
|
| 151 |
outputElement.innerHTML = '<span class="loading">Menerjemahkan...</span>';
|
| 152 |
translateBtn.disabled = true; translateBtn.textContent = 'Menerjemahkan...';
|
| 153 |
try {
|
|
@@ -155,7 +136,7 @@
|
|
| 155 |
method: 'POST',
|
| 156 |
headers: { 'Content-Type': 'application/json' },
|
| 157 |
credentials: 'same-origin',
|
| 158 |
-
body: JSON.stringify({ text: inputText, max_new_tokens:
|
| 159 |
});
|
| 160 |
const data = await r.json();
|
| 161 |
if (r.ok) {
|
|
|
|
| 44 |
</style>
|
| 45 |
</head>
|
| 46 |
<body>
|
|
|
|
|
|
|
| 47 |
<header>
|
| 48 |
<div class="container header-content">
|
| 49 |
<div class="logo"><h1>PapuaTranslate</h1></div>
|
|
|
|
| 52 |
<li><a href="/">Home</a></li>
|
| 53 |
<li><a href="/about">About</a></li>
|
| 54 |
<li><a href="/#history">History</a></li>
|
| 55 |
+
<li><a href="/logout">Logout</a></li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
</ul>
|
| 57 |
</nav>
|
| 58 |
</div>
|
|
|
|
| 61 |
<section class="hero">
|
| 62 |
<div class="container">
|
| 63 |
<h2>Translasi Dialek Papua β Bahasa Indonesia Baku</h2>
|
| 64 |
+
<p>mT5 + LoRA (CENDOL) dengan prenorm & n-best reranking ringan.</p>
|
| 65 |
</div>
|
| 66 |
</section>
|
| 67 |
|
|
|
|
| 104 |
</div>
|
| 105 |
</section>
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
<footer>
|
| 108 |
<div class="container">
|
| 109 |
<div class="footer-content">
|
| 110 |
<div class="footer-section">
|
| 111 |
<h3>PapuaTranslate</h3>
|
| 112 |
+
<p>Yogotak Hubuluk, Motok Hanorogo.</p>
|
| 113 |
</div>
|
| 114 |
<div class="footer-section">
|
| 115 |
+
<h3>Info</h3>
|
| 116 |
<p>Model: mT5 Base + LoRA</p>
|
| 117 |
<p>Device: {{ device }}</p>
|
| 118 |
</div>
|
|
|
|
| 125 |
|
| 126 |
<script>
|
| 127 |
async function translateText() {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
const inputText = document.getElementById('papua-input').value.trim();
|
| 129 |
const outputElement = document.getElementById('indonesia-output');
|
| 130 |
const translateBtn = document.getElementById('translate-btn');
|
| 131 |
+
if (!inputText) { outputElement.textContent = "Silakan masukkan teks."; return; }
|
| 132 |
outputElement.innerHTML = '<span class="loading">Menerjemahkan...</span>';
|
| 133 |
translateBtn.disabled = true; translateBtn.textContent = 'Menerjemahkan...';
|
| 134 |
try {
|
|
|
|
| 136 |
method: 'POST',
|
| 137 |
headers: { 'Content-Type': 'application/json' },
|
| 138 |
credentials: 'same-origin',
|
| 139 |
+
body: JSON.stringify({ text: inputText, max_new_tokens: 48 })
|
| 140 |
});
|
| 141 |
const data = await r.json();
|
| 142 |
if (r.ok) {
|