amosnbn commited on
Commit
9031ea7
·
1 Parent(s): 9fd8fc8

fix(auth): dual columns pass_hash/password + sticky sessions + prenorm + lazy LoRA

Browse files
Files changed (1) hide show
  1. app.py +87 -109
app.py CHANGED
@@ -1,99 +1,83 @@
1
- # app.py
2
- # PapuaTranslate Flask 3 + SQLAlchemy + Supabase + mT5-LoRA (lazy load) + prenorm
3
- import os, re, json, logging, threading
4
  from datetime import datetime, timezone
5
  from functools import wraps
6
- from typing import Optional, Tuple, List
7
 
8
  from flask import (
9
  Flask, render_template, request, redirect, url_for,
10
  session, jsonify, flash
11
  )
12
 
13
- # ========= Logging =========
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
15
  log = logging.getLogger("papua-app")
16
 
17
- # ========= Flask =========
18
- app = Flask(
19
- __name__,
20
- template_folder="frontend", # folder HTML kamu
21
- static_folder="static"
22
- )
23
  app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret-change-me")
24
- app.config["SESSION_COOKIE_SAMESITE"] = "Lax" # biar fetch same-origin ikut kirim cookie
25
- app.config["SESSION_COOKIE_SECURE"] = False # True kalau pakai https strict
 
26
 
27
- # ========= DB: SQLAlchemy (Supabase Postgres) =========
28
- from sqlalchemy import (
29
- create_engine, Column, Integer, Text, DateTime, ForeignKey, func
30
- )
31
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
32
 
33
- DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("DB_URL")
34
- if not DATABASE_URL:
35
- # fallback aman untuk run lokal
36
- DATABASE_URL = "sqlite:////tmp/app.db"
37
- log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
38
-
39
- # tambahkan sslmode=require kalau Postgres dan belum ada
40
  if DATABASE_URL.startswith("postgresql") and "sslmode=" not in DATABASE_URL:
41
- sep = "&" if "?" in DATABASE_URL else "?"
42
- DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
43
 
44
- engine = create_engine(
45
- DATABASE_URL,
46
- pool_pre_ping=True,
47
- )
48
  SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False))
49
  Base = declarative_base()
50
 
51
  class User(Base):
52
  __tablename__ = "users"
53
- id = Column(Integer, primary_key=True)
54
- email = Column(Text, unique=True, nullable=False)
55
- pass_hash = Column(Text, nullable=False) # WAJIB: pakai pass_hash
 
 
56
  created_at = Column(DateTime(timezone=True), server_default=func.now())
57
 
58
  class Translation(Base):
59
  __tablename__ = "translations"
60
- id = Column(Integer, primary_key=True)
61
- user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
62
- src = Column(Text, nullable=False)
63
- mt = Column(Text, nullable=False)
64
  created_at = Column(DateTime(timezone=True), server_default=func.now())
 
65
 
66
- user = relationship("User")
67
-
68
- # create_all aman untuk SQLite; untuk Supabase jika tabel sudah ada, ini tidak mengubah skema
69
  try:
70
  Base.metadata.create_all(engine)
71
  log.info("[DB] Ready: %s", DATABASE_URL)
72
  except Exception as e:
73
  log.exception("[DB] init error: %s", e)
74
 
75
- # ========= Auth helpers =========
76
  from werkzeug.security import generate_password_hash, check_password_hash
77
 
78
- def set_password(user: User, raw: str):
79
- user.pass_hash = generate_password_hash(raw)
 
 
80
 
81
- def verify_password(user: User, raw: str) -> bool:
82
- try:
83
- return check_password_hash(user.pass_hash, raw)
84
- except Exception:
85
- return False
 
 
86
 
87
  def login_required(fn):
88
  @wraps(fn)
89
- def _wrap(*args, **kwargs):
90
  if not session.get("uid"):
91
  return redirect(url_for("login_get"))
92
- return fn(*args, **kwargs)
93
- return _wrap
94
 
95
- # ========= Prenorm (heuristik ringan agar input lebih bersih) =========
96
- # Kamu bisa kembangkan sesuai kebutuhanmu.
97
  PAPUA_MAP = {
98
  r"\bsa\b": "saya",
99
  r"\bko\b": "kamu",
@@ -104,27 +88,23 @@ PAPUA_MAP = {
104
  r"\bsu\b": "sudah",
105
  r"\bkong\b": "kemudian",
106
  }
107
-
108
- def prenorm(text: str) -> str:
109
- t = text.strip()
110
  t = re.sub(r"\s+", " ", t)
111
- # normalisasi tanda baca aneh
112
- t = t.replace("…", "...").replace("–", "-").replace("—", "-")
113
- # map kata umum dialek → baku (heuristik)
114
  for pat, repl in PAPUA_MAP.items():
115
  t = re.sub(pat, repl, t, flags=re.IGNORECASE)
116
  return t
117
 
118
- # ========= Model (lazy-load LoRA) =========
119
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
120
  from peft import PeftModel
121
 
122
  BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
123
  ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
124
- DEVICE = os.getenv("DEVICE", "cpu") # di Spaces CPU
125
  TOK = None
126
  MODEL = None
127
- _MODEL_LOCK = threading.Lock()
128
 
129
  def _load_model():
130
  global TOK, MODEL
@@ -140,38 +120,46 @@ def _load_model():
140
  def get_model():
141
  global MODEL
142
  if MODEL is None:
143
- with _MODEL_LOCK:
144
  if MODEL is None:
145
  _load_model()
146
  return TOK, MODEL
147
 
148
- def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
149
  tok, m = get_model()
150
- inputs = tok([text], return_tensors="pt")
151
- outputs = m.generate(
152
- **inputs,
153
- max_new_tokens=max_new_tokens,
154
  num_beams=4,
155
  length_penalty=0.9,
156
  no_repeat_ngram_size=3,
157
  early_stopping=True,
158
  )
159
- mt = tok.decode(outputs[0], skip_special_tokens=True)
160
- return mt
161
 
162
- # ========= Hooks & small utils =========
163
  @app.before_request
164
  def _log_req():
165
- path = request.path
166
- if path not in ("/health", "/ping", "/favicon.ico"):
167
- log.info("[REQ] %s %s", request.method, path)
168
 
169
- # ========= Routes =========
170
  @app.get("/health")
171
  @app.get("/ping")
172
  def health():
173
- return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
174
-
 
 
 
 
 
 
 
 
 
 
175
  @app.get("/login")
176
  def login_get():
177
  return render_template("login.html")
@@ -179,20 +167,19 @@ def login_get():
179
  @app.post("/login")
180
  def login_post():
181
  email = (request.form.get("email") or "").strip().lower()
182
- pwd = request.form.get("password") or ""
 
183
  if not email or not pwd:
184
  flash("Isi email dan password", "error")
185
  return redirect(url_for("login_get"))
186
-
187
  with SessionLocal() as s:
188
  u = s.query(User).filter_by(email=email).first()
189
- if not u or not verify_password(u, pwd):
190
  flash("Email atau password salah", "error")
191
  return redirect(url_for("login_get"))
192
-
193
  session["uid"] = u.id
194
  session["email"] = u.email
195
- return redirect(url_for("index"))
196
 
197
  @app.get("/register")
198
  def register_get():
@@ -200,19 +187,22 @@ def register_get():
200
 
201
  @app.post("/register")
202
  def register_post():
 
203
  email = (request.form.get("email") or "").strip().lower()
204
- pwd = request.form.get("password") or ""
 
205
  if not email or not pwd:
206
  flash("Isi email dan password", "error")
207
  return redirect(url_for("register_get"))
208
-
209
  with SessionLocal() as s:
210
  if s.query(User).filter_by(email=email).first():
211
  flash("Email sudah terdaftar", "error")
212
- return redirect(url_for("register_get"))
213
  u = User(email=email)
214
- set_password(u, pwd) # SIMPAN di pass_hash
215
- s.add(u); s.commit()
 
 
216
  session["uid"] = u.id
217
  session["email"] = u.email
218
  return redirect(url_for("index"))
@@ -222,47 +212,41 @@ def logout():
222
  session.clear()
223
  return redirect(url_for("login_get"))
224
 
 
225
  @app.get("/")
226
  @login_required
227
  def index():
228
  device = DEVICE
229
- # ambil 10 history terakhir
230
  with SessionLocal() as s:
231
- uid = session.get("uid")
232
  items = (
233
  s.query(Translation)
234
  .filter(Translation.user_id == uid)
235
  .order_by(Translation.id.desc())
236
- .limit(10)
237
- .all()
238
  )
239
  recent = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
240
  return render_template("index.html", logged_in=True, device=device, recent=recent)
241
 
242
  @app.post("/translate")
243
  def api_translate():
244
- # jika ingin wajib login:
245
- if not session.get("uid"):
246
  return jsonify({"error": "Unauthorized"}), 401
247
-
248
  data = request.get_json(silent=True) or {}
249
  text = (data.get("text") or "").strip()
250
- max_new = int(data.get("max_new_tokens", 48))
251
  if not text:
252
- return jsonify({"error": "Empty text"}), 400
253
-
254
  try:
255
- # langkah prenorm → model
256
  clean = prenorm(text)
257
- mt = translate_with_model(clean, max_new_tokens=max_new)
258
- # simpan riwayat
259
  with SessionLocal() as s:
260
  s.add(Translation(user_id=session["uid"], src=text, mt=mt))
261
  s.commit()
262
  return jsonify({"mt": mt})
263
  except Exception as e:
264
  log.exception("translate error: %s", e)
265
- return jsonify({"error": "server error"}), 500
266
 
267
  @app.get("/history")
268
  def api_history():
@@ -274,16 +258,10 @@ def api_history():
274
  s.query(Translation)
275
  .filter(Translation.user_id == uid)
276
  .order_by(Translation.id.desc())
277
- .limit(10)
278
- .all()
279
  )
280
- out = [{
281
- "src": it.src,
282
- "mt": it.mt,
283
- "created_at": it.created_at.strftime("%Y-%m-%d %H:%M")
284
- } for it in items]
285
  return jsonify({"items": out})
286
 
287
  if __name__ == "__main__":
288
- # untuk run lokal: python app.py
289
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)
 
1
+ # app.py — PapuaTranslate (Flask 3 + SQLAlchemy + Supabase + mT5-LoRA + prenorm)
2
+ import os, re, logging, threading
 
3
  from datetime import datetime, timezone
4
  from functools import wraps
 
5
 
6
  from flask import (
7
  Flask, render_template, request, redirect, url_for,
8
  session, jsonify, flash
9
  )
10
 
 
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
12
  log = logging.getLogger("papua-app")
13
 
14
+ app = Flask(__name__, template_folder="frontend", static_folder="static")
 
 
 
 
 
15
  app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret-change-me")
16
+ # cookie settings aman untuk Spaces
17
+ app.config["SESSION_COOKIE_SAMESITE"] = "Lax"
18
+ app.config["SESSION_COOKIE_SECURE"] = True # Spaces via HTTPS → kirim cookie
19
 
20
+ # ================= DB =================
21
+ from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
 
 
22
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
23
 
24
+ DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("DB_URL") or "sqlite:////tmp/app.db"
 
 
 
 
 
 
25
  if DATABASE_URL.startswith("postgresql") and "sslmode=" not in DATABASE_URL:
26
+ DATABASE_URL += ("&" if "?" in DATABASE_URL else "?") + "sslmode=require"
 
27
 
28
+ engine = create_engine(DATABASE_URL, pool_pre_ping=True)
 
 
 
29
  SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False))
30
  Base = declarative_base()
31
 
32
  class User(Base):
33
  __tablename__ = "users"
34
+ id = Column(Integer, primary_key=True)
35
+ email = Column(Text, unique=True, nullable=False)
36
+ # DUA kolom agar kompatibel dengan skema lamamu
37
+ pass_hash = Column(Text, nullable=True) # kolom baru (target utama)
38
+ password = Column(Text, nullable=True) # kolom lama (legacy)
39
  created_at = Column(DateTime(timezone=True), server_default=func.now())
40
 
41
  class Translation(Base):
42
  __tablename__ = "translations"
43
+ id = Column(Integer, primary_key=True)
44
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
45
+ src = Column(Text, nullable=False)
46
+ mt = Column(Text, nullable=False)
47
  created_at = Column(DateTime(timezone=True), server_default=func.now())
48
+ user = relationship("User")
49
 
 
 
 
50
  try:
51
  Base.metadata.create_all(engine)
52
  log.info("[DB] Ready: %s", DATABASE_URL)
53
  except Exception as e:
54
  log.exception("[DB] init error: %s", e)
55
 
56
+ # ================= Auth =================
57
  from werkzeug.security import generate_password_hash, check_password_hash
58
 
59
+ def _set_hash_fields(u: User, raw: str):
60
+ h = generate_password_hash(raw)
61
+ u.pass_hash = h
62
+ u.password = h # isi juga kolom lama supaya tidak NULL
63
 
64
+ def _get_hash(u: User) -> str | None:
65
+ # utamakan pass_hash, fallback ke password
66
+ return u.pass_hash or u.password
67
+
68
+ def _verify(u: User, raw: str) -> bool:
69
+ h = _get_hash(u)
70
+ return check_password_hash(h, raw) if h else False
71
 
72
  def login_required(fn):
73
  @wraps(fn)
74
+ def wrap(*a, **kw):
75
  if not session.get("uid"):
76
  return redirect(url_for("login_get"))
77
+ return fn(*a, **kw)
78
+ return wrap
79
 
80
+ # ================= Prenorm =================
 
81
  PAPUA_MAP = {
82
  r"\bsa\b": "saya",
83
  r"\bko\b": "kamu",
 
88
  r"\bsu\b": "sudah",
89
  r"\bkong\b": "kemudian",
90
  }
91
+ def prenorm(t: str) -> str:
92
+ t = t.strip()
 
93
  t = re.sub(r"\s+", " ", t)
 
 
 
94
  for pat, repl in PAPUA_MAP.items():
95
  t = re.sub(pat, repl, t, flags=re.IGNORECASE)
96
  return t
97
 
98
+ # ================= Model (lazy LoRA) =================
99
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
100
  from peft import PeftModel
101
 
102
  BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
103
  ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
104
+ DEVICE = os.getenv("DEVICE", "cpu")
105
  TOK = None
106
  MODEL = None
107
+ _LOCK = threading.Lock()
108
 
109
  def _load_model():
110
  global TOK, MODEL
 
120
  def get_model():
121
  global MODEL
122
  if MODEL is None:
123
+ with _LOCK:
124
  if MODEL is None:
125
  _load_model()
126
  return TOK, MODEL
127
 
128
+ def translate_core(text: str, max_new: int = 48) -> str:
129
  tok, m = get_model()
130
+ inp = tok([text], return_tensors="pt")
131
+ out = m.generate(
132
+ **inp,
133
+ max_new_tokens=max_new,
134
  num_beams=4,
135
  length_penalty=0.9,
136
  no_repeat_ngram_size=3,
137
  early_stopping=True,
138
  )
139
+ return tok.decode(out[0], skip_special_tokens=True)
 
140
 
141
+ # ================ Utils & Hooks ================
142
  @app.before_request
143
  def _log_req():
144
+ p = request.path
145
+ if p not in ("/health", "/ping", "/favicon.ico"):
146
+ log.info("[REQ] %s %s", request.method, p)
147
 
 
148
  @app.get("/health")
149
  @app.get("/ping")
150
  def health():
151
+ return jsonify(ok=True, time=datetime.now(timezone.utc).isoformat())
152
+
153
+ @app.get("/debug/session")
154
+ def debug_session():
155
+ return jsonify({
156
+ "uid": session.get("uid"),
157
+ "email": session.get("email"),
158
+ "cookie_secure": app.config["SESSION_COOKIE_SECURE"],
159
+ "cookie_samesite": app.config["SESSION_COOKIE_SAMESITE"],
160
+ })
161
+
162
+ # ================ Auth routes ================
163
  @app.get("/login")
164
  def login_get():
165
  return render_template("login.html")
 
167
  @app.post("/login")
168
  def login_post():
169
  email = (request.form.get("email") or "").strip().lower()
170
+ pwd = request.form.get("password") or request.form.get("pass") or ""
171
+ log.info("[AUTH] login attempt email=%s len(pwd)=%d", email, len(pwd))
172
  if not email or not pwd:
173
  flash("Isi email dan password", "error")
174
  return redirect(url_for("login_get"))
 
175
  with SessionLocal() as s:
176
  u = s.query(User).filter_by(email=email).first()
177
+ if not u or not _verify(u, pwd):
178
  flash("Email atau password salah", "error")
179
  return redirect(url_for("login_get"))
 
180
  session["uid"] = u.id
181
  session["email"] = u.email
182
+ return redirect(url_for("index"))
183
 
184
  @app.get("/register")
185
  def register_get():
 
187
 
188
  @app.post("/register")
189
  def register_post():
190
+ # TERIMA beberapa kemungkinan nama field agar match file HTML-mu
191
  email = (request.form.get("email") or "").strip().lower()
192
+ pwd = (request.form.get("password") or request.form.get("pass") or "").strip()
193
+ log.info("[AUTH] register email=%s len(pwd)=%d", email, len(pwd))
194
  if not email or not pwd:
195
  flash("Isi email dan password", "error")
196
  return redirect(url_for("register_get"))
 
197
  with SessionLocal() as s:
198
  if s.query(User).filter_by(email=email).first():
199
  flash("Email sudah terdaftar", "error")
200
+ return redirect(url_for("login_get"))
201
  u = User(email=email)
202
+ _set_hash_fields(u, pwd) # isi pass_hash & password
203
+ s.add(u); s.commit(); s.refresh(u)
204
+ log.info("[AUTH] user created id=%s pass_hash_set=%s password_set=%s",
205
+ u.id, bool(u.pass_hash), bool(u.password))
206
  session["uid"] = u.id
207
  session["email"] = u.email
208
  return redirect(url_for("index"))
 
212
  session.clear()
213
  return redirect(url_for("login_get"))
214
 
215
+ # ================ App routes ================
216
  @app.get("/")
217
  @login_required
218
  def index():
219
  device = DEVICE
 
220
  with SessionLocal() as s:
221
+ uid = session["uid"]
222
  items = (
223
  s.query(Translation)
224
  .filter(Translation.user_id == uid)
225
  .order_by(Translation.id.desc())
226
+ .limit(10).all()
 
227
  )
228
  recent = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
229
  return render_template("index.html", logged_in=True, device=device, recent=recent)
230
 
231
  @app.post("/translate")
232
  def api_translate():
233
+ if not session.get("uid"): # keamanan
 
234
  return jsonify({"error": "Unauthorized"}), 401
 
235
  data = request.get_json(silent=True) or {}
236
  text = (data.get("text") or "").strip()
237
+ maxn = int(data.get("max_new_tokens", 48))
238
  if not text:
239
+ return jsonify({"error":"Empty text"}), 400
 
240
  try:
 
241
  clean = prenorm(text)
242
+ mt = translate_core(clean, max_new=maxn)
 
243
  with SessionLocal() as s:
244
  s.add(Translation(user_id=session["uid"], src=text, mt=mt))
245
  s.commit()
246
  return jsonify({"mt": mt})
247
  except Exception as e:
248
  log.exception("translate error: %s", e)
249
+ return jsonify({"error":"server error"}), 500
250
 
251
  @app.get("/history")
252
  def api_history():
 
258
  s.query(Translation)
259
  .filter(Translation.user_id == uid)
260
  .order_by(Translation.id.desc())
261
+ .limit(10).all()
 
262
  )
263
+ out = [{"src": i.src, "mt": i.mt, "created_at": i.created_at.strftime("%Y-%m-%d %H:%M")} for i in items]
 
 
 
 
264
  return jsonify({"items": out})
265
 
266
  if __name__ == "__main__":
 
267
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=True)