amosnbn commited on
Commit
9fd8fc8
Β·
1 Parent(s): c480756

auth: use pass_hash + prenorm + lazy LoRA load + translate/history

Browse files
Files changed (2) hide show
  1. app.py +239 -371
  2. frontend/index.html +6 -25
app.py CHANGED
@@ -1,421 +1,289 @@
1
- import os, re, json, logging, datetime as dt
2
- from typing import Optional, List
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- from flask import Flask, request, render_template, redirect, url_for, jsonify
5
- from flask_login import (
6
- LoginManager, UserMixin, login_user, logout_user,
7
- login_required, current_user
 
8
  )
9
- from werkzeug.security import generate_password_hash, check_password_hash
 
 
10
 
 
11
  from sqlalchemy import (
12
- create_engine, Column, Integer, String, Text, DateTime, ForeignKey, func, text, inspect
13
  )
14
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
15
 
16
- # =========================
17
- # Logging
18
- # =========================
19
- logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
20
- log = logging.getLogger("papuatranslate")
21
-
22
- # =========================
23
- # Flask
24
- # =========================
25
- app = Flask(__name__, static_folder="static", template_folder="frontend")
26
- app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret")
27
-
28
- # =========================
29
- # Database (Supabase or fallback SQLite)
30
- # =========================
31
- def normalize_database_url(url: Optional[str]) -> str:
32
- if not url:
33
- return "sqlite:////tmp/app.db"
34
- url = url.strip()
35
- if url.startswith("postgresql://") and "psycopg2" not in url:
36
- url = "postgresql+psycopg2://" + url.split("postgresql://", 1)[1]
37
- if url.startswith("postgresql+psycopg2://") and "sslmode=" not in url:
38
- sep = "&" if "?" in url else "?"
39
- url = f"{url}{sep}sslmode=require"
40
- return url
41
-
42
- DATABASE_URL = normalize_database_url(os.getenv("DATABASE_URL"))
43
- try:
44
- engine = create_engine(DATABASE_URL, pool_pre_ping=True)
45
- log.info("[DB] Ready: %s", DATABASE_URL)
46
- except Exception as e:
47
- log.error("[DB] init error: %s", e)
48
- engine = create_engine("sqlite:////tmp/app.db")
49
- log.info("[DB] Fallback: sqlite:////tmp/app.db")
50
 
51
- Session = scoped_session(sessionmaker(bind=engine, autoflush=False, expire_on_commit=False))
 
 
 
 
52
  Base = declarative_base()
53
 
54
- # =========================
55
- # ORM Models (jangan map kolom 'pass_hash' di sini agar query aman)
56
- # =========================
57
- class User(Base, UserMixin):
58
  __tablename__ = "users"
59
  id = Column(Integer, primary_key=True)
60
- email = Column(String(255), unique=True, nullable=False)
61
- password = Column(String(255), nullable=True) # bisa nullable karena beberapa DB lama pakai pass_hash
62
- created_at = Column(DateTime, server_default=func.now())
63
- translations = relationship("Translation", back_populates="user")
64
 
65
  class Translation(Base):
66
  __tablename__ = "translations"
67
  id = Column(Integer, primary_key=True)
68
- user_id = Column(Integer, ForeignKey("users.id"))
69
- src = Column(Text)
70
- mt = Column(Text)
71
- created_at = Column(DateTime, server_default=func.now())
72
- user = relationship("User", back_populates="translations")
73
-
74
- # create jika belum ada
75
- Base.metadata.create_all(engine)
76
-
77
- # =========================
78
- # MIGRASI RINGAN: sinkron kolom users (password/created_at) dan handle pass_hash
79
- # =========================
80
- def has_col(table: str, col: str) -> bool:
81
- insp = inspect(engine)
82
- if not insp.has_table(table): return False
83
- return any(c["name"] == col for c in insp.get_columns(table))
84
-
85
- def col_nullable(table: str, col: str) -> Optional[bool]:
86
- insp = inspect(engine)
87
- if not insp.has_table(table): return None
88
- for c in insp.get_columns(table):
89
- if c["name"] == col:
90
- return c.get("nullable", None)
91
- return None
92
-
93
- def auto_migrate_users_table():
94
- insp = inspect(engine)
95
- if not insp.has_table("users"):
96
- log.info("[DB] Table users belum ada; ORM sudah membuatnya via create_all")
97
- return
98
-
99
- cols = {c["name"] for c in insp.get_columns("users")}
100
- alters = []
101
- if "password" not in cols:
102
- alters.append("ALTER TABLE users ADD COLUMN password VARCHAR(255)")
103
- if "created_at" not in cols:
104
- alters.append("ALTER TABLE users ADD COLUMN created_at TIMESTAMP DEFAULT NOW()")
105
-
106
- # Eksekusi ALTER yang perlu
107
- if alters:
108
- with engine.begin() as conn:
109
- for stmt in alters:
110
- log.info("[DB] MIGRATE: %s", stmt)
111
- conn.execute(text(stmt))
112
-
113
- # Jika ada kolom pass_hash, pastikan terisi (backfill dari password)
114
- if "pass_hash" in cols:
115
- with engine.begin() as conn:
116
- # Backfill: kalau pass_hash NULL & password IS NOT NULL β†’ copy
117
- conn.execute(text("""
118
- UPDATE users
119
- SET pass_hash = password
120
- WHERE pass_hash IS NULL AND password IS NOT NULL
121
- """))
122
- log.info("[DB] Skema users sudah sesuai")
123
-
124
- auto_migrate_users_table()
125
-
126
- # =========================
127
- # Auth
128
- # =========================
129
- login_manager = LoginManager()
130
- login_manager.login_view = "login_get"
131
- login_manager.init_app(app)
132
-
133
- @login_manager.user_loader
134
- def load_user(uid: str):
135
- s = Session()
136
- try:
137
- return s.get(User, int(uid))
138
- finally:
139
- s.close()
140
-
141
- # =========================
142
- # Pre-norm & Heuristics
143
- # =========================
144
- _PRE_NORM_ON = os.getenv("PRE_NORM", "1") != "0"
145
- _N_BEST = max(1, int(os.getenv("N_BEST", "1")))
146
- _COPY_PENALTY = os.getenv("COPY_PENALTY", "1") != "0"
147
-
148
- _PN_SPACE = re.compile(r"\s+")
149
- _PN_REPEAT = re.compile(r"(.)\1{3,}")
150
- _PN_SYMBOLS = { "β€œ":"\"", "”":"\"", "β€˜":"'", "’":"'" }
151
- _PN_RULES = [
152
- (r"\bsa\b", "saya"),
153
- (r"\bko\b", "kamu"),
154
- (r"\btra\b", "tidak"),
155
- (r"\bsu\b", "sudah"),
156
- (r"\bbeta\b", "saya"),
157
- (r"\bdorang\b", "mereka"),
158
- (r"\bdong\b", "mereka"),
159
- (r"\bma\b", "tetapi"),
160
- ]
161
-
162
- def pre_norm(text: str) -> str:
163
- if not text: return text
164
- t = text.replace("\ufeff", "")
165
- for k, v in _PN_SYMBOLS.items(): t = t.replace(k, v)
166
- t = _PN_SPACE.sub(" ", t.strip())
167
- t = _PN_REPEAT.sub(r"\1\1", t)
168
- lower = t.lower()
169
- for rx, rep in _PN_RULES:
170
- lower = re.sub(rx, rep, lower)
171
- return lower.capitalize()
172
-
173
- def copy_distance(a: str, b: str) -> float:
174
- if not a or not b: return 1.0
175
- a, b = a.lower(), b.lower()
176
- same = int(a == b)
177
- sub = sum(1 for x, y in zip(a.split(), b.split()) if x == y)
178
- return max(0.0, 1.0 - (same * 0.7 + sub / (len(a.split()) + 1)))
179
-
180
- # =========================
181
- # Model (lazy)
182
- # =========================
183
- DEVICE = "cpu"
184
- _tok = None
185
- _model = None
186
-
187
- def load_model_safe():
188
- global _tok, _model, DEVICE
189
- if _model is not None: return
190
- base = os.getenv("BASE_MODEL", "").strip()
191
- adapter = os.getenv("LORA_ADAPTER", "").strip()
192
- if not base:
193
- log.warning("[MODEL] BASE_MODEL not set -> DUMMY mode")
194
- return
195
- try:
196
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
197
- from peft import PeftModel
198
- import torch
199
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
200
- log.info("[INFO] Using device: %s", DEVICE)
201
- log.info("[INFO] Base model: %s", base)
202
- if adapter: log.info("[INFO] Adapter : %s", adapter)
203
-
204
- _tok = AutoTokenizer.from_pretrained(base)
205
- _model = AutoModelForSeq2SeqLM.from_pretrained(base)
206
- if adapter:
207
- try:
208
- _model = PeftModel.from_pretrained(_model, adapter)
209
- except Exception as e:
210
- log.error("[MODEL] adapter load error: %s", e)
211
-
212
- _model.eval()
213
- if DEVICE == "cuda":
214
- _model.to("cuda")
215
- except Exception as e:
216
- log.error("[MODEL] load error: %s", e)
217
- _tok, _model = None, None
218
-
219
- def generate_n_best(prompt: str, n_best: int, max_new_tokens: int) -> List[str]:
220
- load_model_safe()
221
- if _model is None or _tok is None:
222
- return [f"(dummy) {prompt}"]
223
- import torch
224
- inputs = _tok([prompt], return_tensors="pt", padding=True, truncation=True)
225
- if DEVICE == "cuda":
226
- inputs = {k: v.to("cuda") for k, v in inputs.items()}
227
- outs = []
228
- for seed in range(n_best):
229
- torch.manual_seed(1234 + seed)
230
- with torch.no_grad():
231
- out = _model.generate(
232
- **inputs,
233
- max_new_tokens=max_new_tokens,
234
- do_sample=True, top_p=0.9, top_k=40, temperature=0.9,
235
- num_return_sequences=1
236
- )
237
- detok = _tok.batch_decode(out, skip_special_tokens=True)
238
- if detok: outs.append(detok[0])
239
- return outs or [""]
240
-
241
- def score_candidate(src: str, hyp: str) -> float:
242
- score = 1.0
243
- if len(hyp.split()) <= 2: score -= 0.3
244
- if _COPY_PENALTY: score += copy_distance(src, hyp) * 0.5
245
- if src.strip().lower() == hyp.strip().lower(): score -= 0.5
246
- return score
247
-
248
- def translate_core(text: str, max_new_tokens: int = 32) -> str:
249
- raw = text or ""
250
- src = pre_norm(raw) if _PRE_NORM_ON else raw
251
- cands = generate_n_best(src, max(1, _N_BEST), max_new_tokens)
252
- hyp = cands[0] if len(cands) == 1 else sorted(
253
- ((score_candidate(src, h), h) for h in cands),
254
- key=lambda x: x[0], reverse=True
255
- )[0][1]
256
- hyp = hyp.strip()
257
- if not hyp.endswith(('.', '!', '?')): hyp += '.'
258
- if hyp: hyp = hyp[0].upper() + hyp[1:]
259
- return hyp
260
-
261
- # =========================
262
- # Helpers untuk register/login menyesuaikan kolom pass_hash
263
- # =========================
264
- def users_has_pass_hash() -> bool:
265
- return has_col("users", "pass_hash")
266
-
267
- def insert_user(email: str, hashed: str) -> int:
268
- """Insert user. Jika ada kolom pass_hash, isi keduanya."""
269
- with engine.begin() as conn:
270
- if users_has_pass_hash():
271
- # Isi keduanya agar constraint NOT NULL terpenuhi
272
- row = conn.execute(
273
- text("INSERT INTO users (email, password, pass_hash) VALUES (:e, :p, :p) RETURNING id"),
274
- {"e": email, "p": hashed}
275
- ).first()
276
- else:
277
- row = conn.execute(
278
- text("INSERT INTO users (email, password) VALUES (:e, :p) RETURNING id"),
279
- {"e": email, "p": hashed}
280
- ).first()
281
- return int(row[0])
282
-
283
- # =========================
284
- # Routes
285
- # =========================
286
- @app.get("/")
287
- def home():
288
- if not current_user.is_authenticated:
289
- return redirect(url_for("login_get"))
290
- s = Session()
291
  try:
292
- q = (s.query(Translation)
293
- .filter(Translation.user_id == current_user.id)
294
- .order_by(Translation.id.desc())
295
- .limit(10))
296
- recent = [
297
- {"src": r.src, "mt": r.mt, "created_at": (r.created_at.isoformat() if r.created_at else "")}
298
- for r in q.all()
299
- ]
300
- finally:
301
- s.close()
302
- return render_template("index.html",
303
- device=("cuda" if os.getenv("CUDA_VISIBLE_DEVICES") else "cpu"),
304
- recent=recent, logged_in=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  @app.get("/login")
307
  def login_get():
308
- if current_user.is_authenticated:
309
- return redirect(url_for("home"))
310
- return render_template("login.html", error=None, logged_in=False)
311
 
312
  @app.post("/login")
313
  def login_post():
314
  email = (request.form.get("email") or "").strip().lower()
315
- password = request.form.get("password") or ""
316
- s = Session()
317
- try:
 
 
 
318
  u = s.query(User).filter_by(email=email).first()
319
- if not u or not u.password or not check_password_hash(u.password, password):
320
- return render_template("login.html", error="Email atau password salah.", logged_in=False), 401
321
- login_user(u, remember=True, duration=dt.timedelta(days=7))
322
- return redirect(url_for("home"))
323
- finally:
324
- s.close()
 
325
 
326
  @app.get("/register")
327
  def register_get():
328
- if current_user.is_authenticated:
329
- return redirect(url_for("home"))
330
- return render_template("register.html", error=None, logged_in=False)
331
 
332
  @app.post("/register")
333
  def register_post():
334
  email = (request.form.get("email") or "").strip().lower()
335
- password = request.form.get("password") or ""
336
- if not email or not password:
337
- return render_template("register.html", error="Email & password wajib diisi.", logged_in=False), 400
338
-
339
- s = Session()
340
- try:
341
- exists = s.query(User).filter_by(email=email).first()
342
- if exists:
343
- return render_template("register.html", error="Email sudah terpakai.", logged_in=False), 400
344
-
345
- hashed = generate_password_hash(password)
346
-
347
- # Gunakan insert manual agar bisa isi pass_hash jika diperlukan
348
- new_id = insert_user(email, hashed)
349
-
350
- # Load lagi usernya untuk login_user
351
- u = s.get(User, new_id)
352
- if not u:
353
- # fallback: buat ORM object (harusnya tidak kejadian)
354
- u = User(id=new_id, email=email, password=hashed)
355
- s.merge(u); s.commit()
356
-
357
- login_user(u, remember=True, duration=dt.timedelta(days=7))
358
- return redirect(url_for("home"))
359
- finally:
360
- s.close()
361
 
362
  @app.get("/logout")
363
- @login_required
364
  def logout():
365
- logout_user()
366
  return redirect(url_for("login_get"))
367
 
368
- @app.post("/translate")
369
  @login_required
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  def api_translate():
 
 
 
 
 
 
 
 
 
 
371
  try:
372
- payload = request.get_json(force=True, silent=True) or {}
373
- text_in = (payload.get("text") or "").strip()
374
- max_new = int(payload.get("max_new_tokens") or 32)
375
- if not text_in:
376
- return jsonify({"error": "text kosong"}), 400
377
- mt = translate_core(text_in, max_new_tokens=max_new)
378
- s = Session()
379
- try:
380
- rec = Translation(user_id=current_user.id, src=text_in, mt=mt)
381
- s.add(rec); s.commit()
382
- finally:
383
- s.close()
384
  return jsonify({"mt": mt})
385
  except Exception as e:
386
  log.exception("translate error: %s", e)
387
  return jsonify({"error": "server error"}), 500
388
 
389
  @app.get("/history")
390
- @login_required
391
  def api_history():
392
- s = Session()
393
- try:
394
- q = (s.query(Translation)
395
- .filter(Translation.user_id == current_user.id)
396
- .order_by(Translation.id.desc())
397
- .limit(10))
398
- items = [
399
- {"src": r.src, "mt": r.mt, "created_at": (r.created_at.isoformat() if r.created_at else "")}
400
- for r in q.all()
401
- ]
402
- return jsonify({"items": items})
403
- finally:
404
- s.close()
405
-
406
- @app.get("/about")
407
- def about():
408
- return render_template("about.html", logged_in=current_user.is_authenticated)
409
-
410
- @app.get("/health")
411
- def health():
412
- return {"status": "ok"}
413
-
414
- @app.errorhandler(500)
415
- def err500(e):
416
- log.exception("Unhandled 500: %s", e)
417
- # Tampilkan form login agar user bisa ulangi langkah
418
- return render_template("login.html", error="Terjadi kesalahan server. Coba lagi."), 500
419
 
420
  if __name__ == "__main__":
 
421
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)
 
1
+ # app.py
2
+ # PapuaTranslate β€” Flask 3 + SQLAlchemy + Supabase + mT5-LoRA (lazy load) + prenorm
3
+ import os, re, json, logging, threading
4
+ from datetime import datetime, timezone
5
+ from functools import wraps
6
+ from typing import Optional, Tuple, List
7
+
8
+ from flask import (
9
+ Flask, render_template, request, redirect, url_for,
10
+ session, jsonify, flash
11
+ )
12
+
13
+ # ========= Logging =========
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
15
+ log = logging.getLogger("papua-app")
16
 
17
+ # ========= Flask =========
18
+ app = Flask(
19
+ __name__,
20
+ template_folder="frontend", # folder HTML kamu
21
+ static_folder="static"
22
  )
23
+ app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret-change-me")
24
+ app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # biar fetch same-origin ikut kirim cookie
25
+ app.config["SESSION_COOKIE_SECURE"] = False # True kalau pakai https strict
26
 
27
+ # ========= DB: SQLAlchemy (Supabase Postgres) =========
28
  from sqlalchemy import (
29
+ create_engine, Column, Integer, Text, DateTime, ForeignKey, func
30
  )
31
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
32
 
33
+ DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("DB_URL")
34
+ if not DATABASE_URL:
35
+ # fallback aman untuk run lokal
36
+ DATABASE_URL = "sqlite:////tmp/app.db"
37
+ log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
38
+
39
+ # tambahkan sslmode=require kalau Postgres dan belum ada
40
+ if DATABASE_URL.startswith("postgresql") and "sslmode=" not in DATABASE_URL:
41
+ sep = "&" if "?" in DATABASE_URL else "?"
42
+ DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ engine = create_engine(
45
+ DATABASE_URL,
46
+ pool_pre_ping=True,
47
+ )
48
+ SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False))
49
  Base = declarative_base()
50
 
51
+ class User(Base):
 
 
 
52
  __tablename__ = "users"
53
  id = Column(Integer, primary_key=True)
54
+ email = Column(Text, unique=True, nullable=False)
55
+ pass_hash = Column(Text, nullable=False) # WAJIB: pakai pass_hash
56
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
 
57
 
58
  class Translation(Base):
59
  __tablename__ = "translations"
60
  id = Column(Integer, primary_key=True)
61
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
62
+ src = Column(Text, nullable=False)
63
+ mt = Column(Text, nullable=False)
64
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
65
+
66
+ user = relationship("User")
67
+
68
+ # create_all aman untuk SQLite; untuk Supabase jika tabel sudah ada, ini tidak mengubah skema
69
+ try:
70
+ Base.metadata.create_all(engine)
71
+ log.info("[DB] Ready: %s", DATABASE_URL)
72
+ except Exception as e:
73
+ log.exception("[DB] init error: %s", e)
74
+
75
+ # ========= Auth helpers =========
76
+ from werkzeug.security import generate_password_hash, check_password_hash
77
+
78
+ def set_password(user: User, raw: str):
79
+ user.pass_hash = generate_password_hash(raw)
80
+
81
+ def verify_password(user: User, raw: str) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  try:
83
+ return check_password_hash(user.pass_hash, raw)
84
+ except Exception:
85
+ return False
86
+
87
+ def login_required(fn):
88
+ @wraps(fn)
89
+ def _wrap(*args, **kwargs):
90
+ if not session.get("uid"):
91
+ return redirect(url_for("login_get"))
92
+ return fn(*args, **kwargs)
93
+ return _wrap
94
+
95
+ # ========= Prenorm (heuristik ringan agar input lebih bersih) =========
96
+ # Kamu bisa kembangkan sesuai kebutuhanmu.
97
+ PAPUA_MAP = {
98
+ r"\bsa\b": "saya",
99
+ r"\bko\b": "kamu",
100
+ r"\btra\b": "tidak",
101
+ r"\bndak\b": "tidak",
102
+ r"\bmo\b": "mau",
103
+ r"\bpu\b": "punya",
104
+ r"\bsu\b": "sudah",
105
+ r"\bkong\b": "kemudian",
106
+ }
107
+
108
+ def prenorm(text: str) -> str:
109
+ t = text.strip()
110
+ t = re.sub(r"\s+", " ", t)
111
+ # normalisasi tanda baca aneh
112
+ t = t.replace("…", "...").replace("–", "-").replace("β€”", "-")
113
+ # map kata umum dialek β†’ baku (heuristik)
114
+ for pat, repl in PAPUA_MAP.items():
115
+ t = re.sub(pat, repl, t, flags=re.IGNORECASE)
116
+ return t
117
+
118
+ # ========= Model (lazy-load LoRA) =========
119
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
120
+ from peft import PeftModel
121
+
122
+ BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
123
+ ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
124
+ DEVICE = os.getenv("DEVICE", "cpu") # di Spaces CPU
125
+ TOK = None
126
+ MODEL = None
127
+ _MODEL_LOCK = threading.Lock()
128
+
129
+ def _load_model():
130
+ global TOK, MODEL
131
+ log.info("[MODEL] loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID)
132
+ TOK = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
133
+ base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
134
+ MODEL = PeftModel.from_pretrained(base, ADAPTER_ID)
135
+ MODEL.eval()
136
+ if DEVICE == "cpu":
137
+ MODEL.to("cpu")
138
+ log.info("[MODEL] ready")
139
+
140
+ def get_model():
141
+ global MODEL
142
+ if MODEL is None:
143
+ with _MODEL_LOCK:
144
+ if MODEL is None:
145
+ _load_model()
146
+ return TOK, MODEL
147
+
148
+ def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
149
+ tok, m = get_model()
150
+ inputs = tok([text], return_tensors="pt")
151
+ outputs = m.generate(
152
+ **inputs,
153
+ max_new_tokens=max_new_tokens,
154
+ num_beams=4,
155
+ length_penalty=0.9,
156
+ no_repeat_ngram_size=3,
157
+ early_stopping=True,
158
+ )
159
+ mt = tok.decode(outputs[0], skip_special_tokens=True)
160
+ return mt
161
+
162
+ # ========= Hooks & small utils =========
163
+ @app.before_request
164
+ def _log_req():
165
+ path = request.path
166
+ if path not in ("/health", "/ping", "/favicon.ico"):
167
+ log.info("[REQ] %s %s", request.method, path)
168
+
169
+ # ========= Routes =========
170
+ @app.get("/health")
171
+ @app.get("/ping")
172
+ def health():
173
+ return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
174
 
175
  @app.get("/login")
176
  def login_get():
177
+ return render_template("login.html")
 
 
178
 
179
  @app.post("/login")
180
  def login_post():
181
  email = (request.form.get("email") or "").strip().lower()
182
+ pwd = request.form.get("password") or ""
183
+ if not email or not pwd:
184
+ flash("Isi email dan password", "error")
185
+ return redirect(url_for("login_get"))
186
+
187
+ with SessionLocal() as s:
188
  u = s.query(User).filter_by(email=email).first()
189
+ if not u or not verify_password(u, pwd):
190
+ flash("Email atau password salah", "error")
191
+ return redirect(url_for("login_get"))
192
+
193
+ session["uid"] = u.id
194
+ session["email"] = u.email
195
+ return redirect(url_for("index"))
196
 
197
  @app.get("/register")
198
  def register_get():
199
+ return render_template("register.html")
 
 
200
 
201
  @app.post("/register")
202
  def register_post():
203
  email = (request.form.get("email") or "").strip().lower()
204
+ pwd = request.form.get("password") or ""
205
+ if not email or not pwd:
206
+ flash("Isi email dan password", "error")
207
+ return redirect(url_for("register_get"))
208
+
209
+ with SessionLocal() as s:
210
+ if s.query(User).filter_by(email=email).first():
211
+ flash("Email sudah terdaftar", "error")
212
+ return redirect(url_for("register_get"))
213
+ u = User(email=email)
214
+ set_password(u, pwd) # SIMPAN di pass_hash
215
+ s.add(u); s.commit()
216
+ session["uid"] = u.id
217
+ session["email"] = u.email
218
+ return redirect(url_for("index"))
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  @app.get("/logout")
 
221
  def logout():
222
+ session.clear()
223
  return redirect(url_for("login_get"))
224
 
225
+ @app.get("/")
226
  @login_required
227
+ def index():
228
+ device = DEVICE
229
+ # ambil 10 history terakhir
230
+ with SessionLocal() as s:
231
+ uid = session.get("uid")
232
+ items = (
233
+ s.query(Translation)
234
+ .filter(Translation.user_id == uid)
235
+ .order_by(Translation.id.desc())
236
+ .limit(10)
237
+ .all()
238
+ )
239
+ recent = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
240
+ return render_template("index.html", logged_in=True, device=device, recent=recent)
241
+
242
+ @app.post("/translate")
243
  def api_translate():
244
+ # jika ingin wajib login:
245
+ if not session.get("uid"):
246
+ return jsonify({"error": "Unauthorized"}), 401
247
+
248
+ data = request.get_json(silent=True) or {}
249
+ text = (data.get("text") or "").strip()
250
+ max_new = int(data.get("max_new_tokens", 48))
251
+ if not text:
252
+ return jsonify({"error": "Empty text"}), 400
253
+
254
  try:
255
+ # langkah prenorm β†’ model
256
+ clean = prenorm(text)
257
+ mt = translate_with_model(clean, max_new_tokens=max_new)
258
+ # simpan riwayat
259
+ with SessionLocal() as s:
260
+ s.add(Translation(user_id=session["uid"], src=text, mt=mt))
261
+ s.commit()
 
 
 
 
 
262
  return jsonify({"mt": mt})
263
  except Exception as e:
264
  log.exception("translate error: %s", e)
265
  return jsonify({"error": "server error"}), 500
266
 
267
  @app.get("/history")
 
268
  def api_history():
269
+ if not session.get("uid"):
270
+ return jsonify({"items": []})
271
+ with SessionLocal() as s:
272
+ uid = session["uid"]
273
+ items = (
274
+ s.query(Translation)
275
+ .filter(Translation.user_id == uid)
276
+ .order_by(Translation.id.desc())
277
+ .limit(10)
278
+ .all()
279
+ )
280
+ out = [{
281
+ "src": it.src,
282
+ "mt": it.mt,
283
+ "created_at": it.created_at.strftime("%Y-%m-%d %H:%M")
284
+ } for it in items]
285
+ return jsonify({"items": out})
 
 
 
 
 
 
 
 
 
 
286
 
287
  if __name__ == "__main__":
288
+ # untuk run lokal: python app.py
289
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)
frontend/index.html CHANGED
@@ -44,8 +44,6 @@
44
  </style>
45
  </head>
46
  <body>
47
- <script>window.LOGGED_IN = {{ 'true' if logged_in else 'false' }};</script>
48
-
49
  <header>
50
  <div class="container header-content">
51
  <div class="logo"><h1>PapuaTranslate</h1></div>
@@ -54,13 +52,7 @@
54
  <li><a href="/">Home</a></li>
55
  <li><a href="/about">About</a></li>
56
  <li><a href="/#history">History</a></li>
57
- {% if logged_in %}
58
- <li class="auth-badge">Login: <span class="pill">Aktif</span></li>
59
- <li><a href="/logout">Logout</a></li>
60
- {% else %}
61
- <li><a href="/login">Login</a></li>
62
- <li><a href="/register">Daftar</a></li>
63
- {% endif %}
64
  </ul>
65
  </nav>
66
  </div>
@@ -69,7 +61,7 @@
69
  <section class="hero">
70
  <div class="container">
71
  <h2>Translasi Dialek Papua β†’ Bahasa Indonesia Baku</h2>
72
- <p>Model mT5 + LoRA (CENDOL) dengan prenorm & reranking n-best.</p>
73
  </div>
74
  </section>
75
 
@@ -112,22 +104,15 @@
112
  </div>
113
  </section>
114
 
115
- <section class="info">
116
- <div class="container">
117
- <h3>Tentang Aplikasi</h3>
118
- <p>PapuaTranslate menerapkan arsitektur CENDOL berbasis mT5 dengan LoRA adapter untuk menerjemahkan dialek Papua ke Bahasa Indonesia baku secara ringkas dan aman.</p>
119
- </div>
120
- </section>
121
-
122
  <footer>
123
  <div class="container">
124
  <div class="footer-content">
125
  <div class="footer-section">
126
  <h3>PapuaTranslate</h3>
127
- <p>Yogotak Hubuluk, Motok Hanorogo β€” Hari esok harus lebih baik dari hari ini.</p>
128
  </div>
129
  <div class="footer-section">
130
- <h3>Informasi Teknis</h3>
131
  <p>Model: mT5 Base + LoRA</p>
132
  <p>Device: {{ device }}</p>
133
  </div>
@@ -140,14 +125,10 @@
140
 
141
  <script>
142
  async function translateText() {
143
- if (window.LOGGED_IN !== true && window.LOGGED_IN !== 'true') {
144
- window.location.href = '/login';
145
- return;
146
- }
147
  const inputText = document.getElementById('papua-input').value.trim();
148
  const outputElement = document.getElementById('indonesia-output');
149
  const translateBtn = document.getElementById('translate-btn');
150
- if (!inputText) { outputElement.textContent = "Silakan masukkan teks logat Papua."; return; }
151
  outputElement.innerHTML = '<span class="loading">Menerjemahkan...</span>';
152
  translateBtn.disabled = true; translateBtn.textContent = 'Menerjemahkan...';
153
  try {
@@ -155,7 +136,7 @@
155
  method: 'POST',
156
  headers: { 'Content-Type': 'application/json' },
157
  credentials: 'same-origin',
158
- body: JSON.stringify({ text: inputText, max_new_tokens: 24 })
159
  });
160
  const data = await r.json();
161
  if (r.ok) {
 
44
  </style>
45
  </head>
46
  <body>
 
 
47
  <header>
48
  <div class="container header-content">
49
  <div class="logo"><h1>PapuaTranslate</h1></div>
 
52
  <li><a href="/">Home</a></li>
53
  <li><a href="/about">About</a></li>
54
  <li><a href="/#history">History</a></li>
55
+ <li><a href="/logout">Logout</a></li>
 
 
 
 
 
 
56
  </ul>
57
  </nav>
58
  </div>
 
61
  <section class="hero">
62
  <div class="container">
63
  <h2>Translasi Dialek Papua β†’ Bahasa Indonesia Baku</h2>
64
+ <p>mT5 + LoRA (CENDOL) dengan prenorm & n-best reranking ringan.</p>
65
  </div>
66
  </section>
67
 
 
104
  </div>
105
  </section>
106
 
 
 
 
 
 
 
 
107
  <footer>
108
  <div class="container">
109
  <div class="footer-content">
110
  <div class="footer-section">
111
  <h3>PapuaTranslate</h3>
112
+ <p>Yogotak Hubuluk, Motok Hanorogo.</p>
113
  </div>
114
  <div class="footer-section">
115
+ <h3>Info</h3>
116
  <p>Model: mT5 Base + LoRA</p>
117
  <p>Device: {{ device }}</p>
118
  </div>
 
125
 
126
  <script>
127
  async function translateText() {
 
 
 
 
128
  const inputText = document.getElementById('papua-input').value.trim();
129
  const outputElement = document.getElementById('indonesia-output');
130
  const translateBtn = document.getElementById('translate-btn');
131
+ if (!inputText) { outputElement.textContent = "Silakan masukkan teks."; return; }
132
  outputElement.innerHTML = '<span class="loading">Menerjemahkan...</span>';
133
  translateBtn.disabled = true; translateBtn.textContent = 'Menerjemahkan...';
134
  try {
 
136
  method: 'POST',
137
  headers: { 'Content-Type': 'application/json' },
138
  credentials: 'same-origin',
139
+ body: JSON.stringify({ text: inputText, max_new_tokens: 48 })
140
  });
141
  const data = await r.json();
142
  if (r.ok) {