amosnbn commited on
Commit
f7bc2d9
Β·
1 Parent(s): ff8a965
Files changed (1) hide show
  1. app.py +197 -133
app.py CHANGED
@@ -1,149 +1,213 @@
1
- # app.py β€” PapuaTranslate (HF Spaces, Flask)
2
- import os, logging
3
- from functools import lru_cache
4
- from typing import Optional
5
- from flask import (
6
- Flask, render_template, request, redirect, url_for,
7
- session, flash, jsonify
8
- )
9
- from werkzeug.security import check_password_hash, generate_password_hash
10
-
11
- # --- SQLAlchemy (opsional: Supabase Postgres) ---
12
- from sqlalchemy import create_engine, text
13
- from sqlalchemy.exc import SQLAlchemyError
14
-
15
- # --- Model (lazy load) ---
16
- import torch
17
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
18
- from peft import PeftModel
19
 
20
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
21
- log = logging.getLogger("papua-translate")
22
-
23
- # ================== ENV ==================
24
- SECRET_KEY = os.getenv("SECRET_KEY", "dev-secret-please-set")
25
- DATABASE_URL = os.getenv("DATABASE_URL", "") # e.g. postgresql+psycopg2://user:pass@host:6543/postgres?sslmode=require
26
- BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small")
27
- ADAPTER_ID = os.getenv("ADAPTER_ID", "") # e.g. your LoRA on HF
28
- DISABLE_AUTH = os.getenv("DISABLE_AUTH", "false").lower() in ("1", "true", "yes")
29
 
30
- # ================== APP ==================
31
  app = Flask(__name__, template_folder="templates", static_folder="static")
32
- app.secret_key = SECRET_KEY
33
 
34
- # Cookie settings aman untuk HF (HTTPS)
 
35
  app.config.update(
36
- SESSION_COOKIE_HTTPONLY=True,
37
  SESSION_COOKIE_SAMESITE="Lax",
38
- SESSION_COOKIE_SECURE=True, # HF pakai HTTPS
39
- PERMANENT_SESSION_LIFETIME=60*60*8, # 8 jam
40
  )
41
 
42
- # ================== DB (opsional) ==================
43
- engine = None
44
- if DATABASE_URL:
45
- try:
46
- engine = create_engine(DATABASE_URL, pool_pre_ping=True, pool_recycle=1800)
47
- with engine.connect() as conn:
48
- conn.execute(text("SELECT 1"))
49
- log.info("[DB] Connected OK")
50
- except SQLAlchemyError as e:
51
- log.error(f"[DB] Failed: {e}")
52
-
53
- # ================== AUTH MOCK ==================
54
- # Untuk demo: 1 user statis via ENV (atau bisa kamu simpan di DB)
55
- ADMIN_USER = os.getenv("ADMIN_USER", "admin@example.com")
56
- ADMIN_PASS_HASH = os.getenv("ADMIN_PASS_HASH", generate_password_hash(os.getenv("ADMIN_PASS", "admin123")))
57
-
58
- def is_logged_in() -> bool:
59
- return bool(session.get("uid"))
60
-
61
- # ================== LAZY MODEL ==================
62
- @lru_cache(maxsize=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def _load_model():
64
- """
65
- Load base mT5 + attach LoRA (jika ada). Diload sekali saat dipakai pertama kali.
66
- """
67
- log.info(f"[MODEL] Loading base: {BASE_MODEL_ID}")
68
- tok = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
69
  base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
70
- if ADAPTER_ID:
71
- log.info(f"[MODEL] Attaching LoRA: {ADAPTER_ID}")
72
- base = PeftModel.from_pretrained(base, ADAPTER_ID)
73
- device = "cuda" if torch.cuda.is_available() else "cpu"
74
- base.to(device)
75
- base.eval()
76
- log.info(f"[MODEL] Ready on {device}")
77
- return tok, base, device
78
-
79
- def translate_text(input_text: str, max_new_tokens: int = 64) -> str:
80
- tok, model, device = _load_model()
81
- inputs = tok(input_text, return_tensors="pt", truncation=True).to(device)
82
- with torch.no_grad():
83
- out = model.generate(
84
- **inputs,
85
- max_new_tokens=max_new_tokens,
86
- num_beams=4,
87
- length_penalty=1.0
88
- )
89
- return tok.decode(out[0], skip_special_tokens=True)
90
-
91
- # ================== ROUTES ==================
92
- @app.route("/")
93
- def home():
94
- # Jika auth dimatikan, langsung ke dashboard
95
- if DISABLE_AUTH or is_logged_in():
96
- return redirect(url_for("dashboard"))
97
- return redirect(url_for("login"))
98
-
99
- @app.route("/login", methods=["GET", "POST"])
100
- def login():
101
- if DISABLE_AUTH:
102
- session["uid"] = "debug-user"
103
- return redirect(url_for("dashboard"))
104
-
105
- if request.method == "POST":
106
- email = request.form.get("email", "").strip().lower()
107
- password = request.form.get("password", "")
108
- if email == ADMIN_USER and check_password_hash(ADMIN_PASS_HASH, password):
109
- session["uid"] = email
110
- return redirect(url_for("dashboard"))
111
- flash("Email atau password salah.", "error")
112
- return render_template("login.html")
113
- return render_template("login.html")
114
-
115
- @app.route("/logout")
116
- def logout():
117
- session.clear()
118
- flash("Anda telah keluar.", "info")
119
- return redirect(url_for("login"))
120
-
121
- @app.route("/dashboard")
122
- def dashboard():
123
- if not (DISABLE_AUTH or is_logged_in()):
124
- return redirect(url_for("login"))
125
- return render_template("dashboard.html")
126
-
127
- @app.route("/translate", methods=["POST"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def api_translate():
129
- if not (DISABLE_AUTH or is_logged_in()):
130
- return jsonify({"ok": False, "error": "Unauthorized"}), 401
131
- data = request.get_json(silent=True) or {}
132
- text = (data.get("text") or "").strip()
133
- if not text:
134
- return jsonify({"ok": False, "error": "Text kosong"}), 400
135
  try:
136
- result = translate_text(text)
137
- return jsonify({"ok": True, "result": result})
 
 
138
  except Exception as e:
139
- log.exception("Translate error")
140
- return jsonify({"ok": False, "error": str(e)}), 500
141
-
142
- @app.route("/about")
143
- def about():
144
- return render_template("about.html")
145
 
146
- # --- local run (Spaces akan pakai gunicorn) ---
147
  if __name__ == "__main__":
148
- port = int(os.environ.get("PORT", 7860))
149
- app.run(host="0.0.0.0", port=port, debug=False)
 
1
+ # app.py
2
+ import os, re, json, logging, threading
3
+ from datetime import datetime, timezone
4
+ from functools import wraps
5
+ from typing import Optional, Tuple, List
6
+ from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
9
+ log = logging.getLogger("papua-app")
 
 
 
 
 
 
 
10
 
11
+ # ==== GANTI: pakai folder 'templates' (atau pastikan file HTML ada di 'frontend') ====
12
  app = Flask(__name__, template_folder="templates", static_folder="static")
 
13
 
14
+ # cookie aman di HF (HTTPS). Untuk lokal bisa set False via ENV.
15
+ SESSION_SECURE = os.getenv("SESSION_COOKIE_SECURE", "true").lower() in ("1","true","yes")
16
  app.config.update(
17
+ SECRET_KEY=os.getenv("SECRET_KEY", "dev-secret-change-me"),
18
  SESSION_COOKIE_SAMESITE="Lax",
19
+ SESSION_COOKIE_SECURE=SESSION_SECURE,
 
20
  )
21
 
22
+ # ================= DB =================
23
+ from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
24
+ from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
25
+
26
+ DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("DB_URL")
27
+ if not DATABASE_URL:
28
+ DATABASE_URL = "sqlite:////tmp/app.db"
29
+ log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
30
+ else:
31
+ # normalisasi skema lama
32
+ if DATABASE_URL.startswith("postgres://"):
33
+ DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql+psycopg2://", 1)
34
+ elif DATABASE_URL.startswith("postgresql://"):
35
+ DATABASE_URL = DATABASE_URL.replace("postgresql://", "postgresql+psycopg2://", 1)
36
+ # tambahkan sslmode kalau belum ada
37
+ if DATABASE_URL.startswith("postgresql+psycopg2") and "sslmode=" not in DATABASE_URL:
38
+ sep = "&" if "?" in DATABASE_URL else "?"
39
+ DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
40
+
41
+ engine = create_engine(DATABASE_URL, pool_pre_ping=True)
42
+ SessionLocal = scoped_session(sessionmaker(bind=engine, autoflush=False, autocommit=False))
43
+ Base = declarative_base()
44
+
45
+ class User(Base):
46
+ __tablename__ = "users"
47
+ id = Column(Integer, primary_key=True)
48
+ email = Column(Text, unique=True, nullable=False)
49
+ pass_hash = Column(Text, nullable=False)
50
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
51
+
52
+ class Translation(Base):
53
+ __tablename__ = "translations"
54
+ id = Column(Integer, primary_key=True)
55
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
56
+ src = Column(Text, nullable=False)
57
+ mt = Column(Text, nullable=False)
58
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
59
+ user = relationship("User")
60
+
61
+ try:
62
+ Base.metadata.create_all(engine)
63
+ log.info("[DB] Ready: %s", DATABASE_URL)
64
+ except Exception as e:
65
+ log.exception("[DB] init error: %s", e)
66
+
67
+ # ================= Auth =================
68
+ from werkzeug.security import generate_password_hash, check_password_hash
69
+ def set_password(user: User, raw: str): user.pass_hash = generate_password_hash(raw)
70
+ def verify_password(user: User, raw: str) -> bool:
71
+ try: return check_password_hash(user.pass_hash, raw)
72
+ except Exception: return False
73
+
74
+ def login_required(fn):
75
+ @wraps(fn)
76
+ def _wrap(*args, **kwargs):
77
+ if not session.get("uid"):
78
+ return redirect(url_for("login_get"))
79
+ return fn(*args, **kwargs)
80
+ return _wrap
81
+
82
+ # ================= Prenorm =================
83
+ PAPUA_MAP = {
84
+ r"\bsa\b": "saya", r"\bko\b": "kamu", r"\btra\b": "tidak", r"\bndak\b": "tidak",
85
+ r"\bmo\b": "mau", r"\bpu\b": "punya", r"\bsu\b": "sudah", r"\bkong\b": "kemudian",
86
+ }
87
+ def prenorm(text: str) -> str:
88
+ t = re.sub(r"\s+", " ", text.strip())
89
+ t = t.replace("…","...").replace("–","-").replace("β€”","-")
90
+ for pat, repl in PAPUA_MAP.items(): t = re.sub(pat, repl, t, flags=re.IGNORECASE)
91
+ return t
92
+
93
+ # ================= Model (lazy) =================
94
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
95
+ from peft import PeftModel
96
+
97
+ BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
98
+ ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
99
+ DEVICE = "cuda" if os.getenv("DEVICE","cpu") == "cuda" else "cpu"
100
+ TOK = None; MODEL = None; _MODEL_LOCK = threading.Lock()
101
+
102
  def _load_model():
103
+ global TOK, MODEL
104
+ log.info("[MODEL] loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID)
105
+ TOK = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
 
 
106
  base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
107
+ MODEL = PeftModel.from_pretrained(base, ADAPTER_ID)
108
+ MODEL.eval().to(DEVICE)
109
+ log.info("[MODEL] ready on %s", DEVICE)
110
+
111
+ def get_model():
112
+ global MODEL
113
+ if MODEL is None:
114
+ with _MODEL_LOCK:
115
+ if MODEL is None: _load_model()
116
+ return TOK, MODEL
117
+
118
+ def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
119
+ tok, m = get_model()
120
+ inputs = tok([text], return_tensors="pt").to(DEVICE)
121
+ outputs = m.generate(
122
+ **inputs, max_new_tokens=max_new_tokens, num_beams=4,
123
+ length_penalty=0.9, no_repeat_ngram_size=3, early_stopping=True,
124
+ )
125
+ return tok.decode(outputs[0], skip_special_tokens=True)
126
+
127
+ # ================= Utilities =================
128
+ @app.before_request
129
+ def _log_req():
130
+ if request.path not in ("/health", "/ping", "/favicon.ico"):
131
+ log.info("[REQ] %s %s", request.method, request.path)
132
+
133
+ @app.errorhandler(Exception)
134
+ def _err(e):
135
+ # supaya 500 kelihatan jelas di Logs HF
136
+ log.exception("Unhandled error")
137
+ return "Internal Server Error", 500
138
+
139
+ # ================= Routes =================
140
+ @app.get("/health"); @app.get("/ping")
141
+ def health(): return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
142
+
143
+ @app.get("/login")
144
+ def login_get(): return render_template("login.html")
145
+
146
+ @app.post("/login")
147
+ def login_post():
148
+ email = (request.form.get("email") or "").strip().lower()
149
+ pwd = request.form.get("password") or ""
150
+ if not email or not pwd:
151
+ flash("Isi email dan password", "error"); return redirect(url_for("login_get"))
152
+ with SessionLocal() as s:
153
+ u = s.query(User).filter_by(email=email).first()
154
+ if not u or not verify_password(u, pwd):
155
+ flash("Email atau password salah", "error"); return redirect(url_for("login_get"))
156
+ session["uid"], session["email"] = u.id, u.email
157
+ return redirect(url_for("index"))
158
+
159
+ @app.get("/register")
160
+ def register_get(): return render_template("register.html")
161
+
162
+ @app.post("/register")
163
+ def register_post():
164
+ email = (request.form.get("email") or "").strip().lower()
165
+ pwd = request.form.get("password") or ""
166
+ if not email or not pwd:
167
+ flash("Isi email dan password", "error"); return redirect(url_for("register_get"))
168
+ with SessionLocal() as s:
169
+ if s.query(User).filter_by(email=email).first():
170
+ flash("Email sudah terdaftar", "error"); return redirect(url_for("register_get"))
171
+ u = User(email=email); set_password(u, pwd); s.add(u); s.commit()
172
+ session["uid"], session["email"] = u.id, u.email
173
+ return redirect(url_for("index"))
174
+
175
+ @app.get("/logout")
176
+ def logout(): session.clear(); return redirect(url_for("login_get"))
177
+
178
+ @app.get("/")
179
+ @login_required
180
+ def index():
181
+ with SessionLocal() as s:
182
+ uid = session["uid"]
183
+ items = (s.query(Translation)
184
+ .filter(Translation.user_id == uid)
185
+ .order_by(Translation.id.desc()).limit(10).all())
186
+ data = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
187
+ # ==== GANTI: kirim variabel yang dipakai template ====
188
+ return render_template("index.html",
189
+ user=session.get("email"),
190
+ data=data,
191
+ device=DEVICE)
192
+
193
+ @app.get("/about")
194
+ def about_page(): return render_template("about.html")
195
+
196
+ @app.post("/translate")
197
  def api_translate():
198
+ if not session.get("uid"): return jsonify({"error": "Unauthorized"}), 401
199
+ payload = request.get_json(silent=True) or {}
200
+ text = (payload.get("text") or "").strip()
201
+ max_new = int(payload.get("max_new_tokens", 48))
202
+ if not text: return jsonify({"error": "Empty text"}), 400
 
203
  try:
204
+ clean = prenorm(text); mt = translate_with_model(clean, max_new_tokens=max_new)
205
+ with SessionLocal() as s:
206
+ s.add(Translation(user_id=session["uid"], src=text, mt=mt)); s.commit()
207
+ return jsonify({"mt": mt})
208
  except Exception as e:
209
+ log.exception("translate error")
210
+ return jsonify({"error": "server error"}), 500
 
 
 
 
211
 
 
212
  if __name__ == "__main__":
213
+ app.run(host="0.0.0.0", port=int(os.getenv("PORT","7860")), debug=False)