amosnbn commited on
Commit
ff8a965
·
1 Parent(s): c5f0f42
Files changed (1) hide show
  1. app.py +134 -187
app.py CHANGED
@@ -1,202 +1,149 @@
1
- # app.py — PapuaTranslate (HuggingFace Spaces + Supabase + mT5-LoRA + prenorm)
2
-
3
- import os, re, logging, threading
4
- from datetime import datetime, timezone
5
- from functools import wraps
6
  from flask import (
7
  Flask, render_template, request, redirect, url_for,
8
- session, jsonify, flash
9
  )
10
- from werkzeug.security import generate_password_hash, check_password_hash
11
 
12
- logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
13
- log = logging.getLogger("papuatranslate")
14
-
15
- # ================= APP CONFIG =================
16
- app = Flask(__name__, template_folder="frontend", static_folder="static")
17
- app.config["SECRET_KEY"] = os.getenv("SECRET_KEY", "dev-secret-key")
18
- app.config["SESSION_COOKIE_SAMESITE"] = "Lax"
19
- app.config["SESSION_COOKIE_SECURE"] = False # penting di Spaces biar cookie disimpan
20
- app.config["PERMANENT_SESSION_LIFETIME"] = 3600 * 24
21
-
22
- # ================= DATABASE =================
23
- from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
24
- from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
25
-
26
- DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///local.db")
27
- if DATABASE_URL.startswith("postgresql") and "sslmode" not in DATABASE_URL:
28
- DATABASE_URL += "?sslmode=require"
29
-
30
- engine = create_engine(DATABASE_URL, pool_pre_ping=True)
31
- SessionLocal = scoped_session(sessionmaker(bind=engine))
32
- Base = declarative_base()
33
-
34
- class User(Base):
35
- __tablename__ = "users"
36
- id = Column(Integer, primary_key=True)
37
- email = Column(Text, unique=True, nullable=False)
38
- pass_hash = Column(Text, nullable=True)
39
- password = Column(Text, nullable=True)
40
- created_at = Column(DateTime(timezone=True), server_default=func.now())
41
-
42
- class Translation(Base):
43
- __tablename__ = "translations"
44
- id = Column(Integer, primary_key=True)
45
- user_id = Column(Integer, ForeignKey("users.id"))
46
- src = Column(Text)
47
- mt = Column(Text)
48
- created_at = Column(DateTime(timezone=True), server_default=func.now())
49
-
50
- Base.metadata.create_all(engine)
51
- log.info("[DB] Ready: %s", DATABASE_URL)
52
-
53
- # ================= PRENORM =================
54
- PAPUA_MAP = {
55
- r"\bsa\b": "saya",
56
- r"\bko\b": "kamu",
57
- r"\btra\b": "tidak",
58
- r"\bndak\b": "tidak",
59
- r"\bmo\b": "mau",
60
- r"\bpu\b": "punya",
61
- r"\bsu\b": "sudah",
62
- r"\bkong\b": "kemudian",
63
- }
64
- def prenorm(t: str) -> str:
65
- t = re.sub(r"\s+", " ", t.strip())
66
- for pat, rep in PAPUA_MAP.items():
67
- t = re.sub(pat, rep, t, flags=re.IGNORECASE)
68
- return t
69
-
70
- # ================= AUTH UTILS =================
71
- def _set_hash(user: User, raw_pwd: str):
72
- h = generate_password_hash(raw_pwd)
73
- user.pass_hash = h
74
- user.password = h
75
-
76
- def _verify(user: User, raw_pwd: str) -> bool:
77
- return check_password_hash(user.pass_hash or user.password, raw_pwd)
78
-
79
- def login_required(f):
80
- @wraps(f)
81
- def decorated(*args, **kwargs):
82
- if not session.get("uid"):
83
- return redirect(url_for("login_get"))
84
- return f(*args, **kwargs)
85
- return decorated
86
-
87
- # ================= MODEL (lazy-load) =================
88
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
89
  from peft import PeftModel
90
 
91
- BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "amosnbn/cendol-mt5-base-inst")
92
- ADAPTER_ID = os.getenv("ADAPTER_ID", "amosnbn/papua-lora-ckpt-168")
93
- DEVICE = os.getenv("DEVICE", "cpu")
94
- _tok, _model, _lock = None, None, threading.Lock()
95
-
96
- def get_model():
97
- global _tok, _model
98
- if _model is None:
99
- with _lock:
100
- if _model is None:
101
- log.info("[MODEL] Loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID)
102
- _tok = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
103
- base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
104
- _model = PeftModel.from_pretrained(base, ADAPTER_ID)
105
- _model.eval()
106
- if DEVICE == "cpu": _model.to("cpu")
107
- log.info("[MODEL] Loaded successfully")
108
- return _tok, _model
109
-
110
- def translate_mt(text: str, max_new=48) -> str:
111
- tok, model = get_model()
112
- inputs = tok([text], return_tensors="pt")
113
- output = model.generate(**inputs, max_new_tokens=max_new, num_beams=4)
114
- return tok.decode(output[0], skip_special_tokens=True)
115
-
116
- # ================= ROUTES =================
117
- @app.before_request
118
- def _req():
119
- log.info("[REQ] %s %s", request.method, request.path)
120
-
121
- @app.get("/")
122
- @login_required
123
- def index():
124
- with SessionLocal() as s:
125
- uid = session["uid"]
126
- hist = s.query(Translation).filter(Translation.user_id == uid).all()
127
- data = [{"src": h.src, "mt": h.mt} for h in hist]
128
- return render_template("index.html", user=session.get("email"), data=data)
129
-
130
- @app.get("/login")
131
- def login_get():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  return render_template("login.html")
133
 
134
- @app.post("/login")
135
- def login_post():
136
- email = (request.form.get("email") or "").strip().lower()
137
- pwd = request.form.get("password") or ""
138
- log.info("[AUTH] login attempt %s", email)
139
- if not email or not pwd:
140
- flash("Email atau password kosong", "error")
141
- return redirect(url_for("login_get"))
142
-
143
- with SessionLocal() as s:
144
- u = s.query(User).filter_by(email=email).first()
145
- if not u or not _verify(u, pwd):
146
- flash("Email/password salah", "error")
147
- return redirect(url_for("login_get"))
148
- session["uid"] = u.id
149
- session["email"] = u.email
150
- log.info("[AUTH] Login OK uid=%s", u.id)
151
- return redirect(url_for("index"))
152
-
153
- @app.get("/register")
154
- def register_get():
155
- return render_template("register.html")
156
-
157
- @app.post("/register")
158
- def register_post():
159
- email = (request.form.get("email") or "").strip().lower()
160
- pwd = request.form.get("password") or ""
161
- log.info("[AUTH] register %s", email)
162
- if not email or not pwd:
163
- flash("Lengkapi data", "error")
164
- return redirect(url_for("register_get"))
165
-
166
- with SessionLocal() as s:
167
- if s.query(User).filter_by(email=email).first():
168
- flash("Email sudah terdaftar", "error")
169
- return redirect(url_for("login_get"))
170
- u = User(email=email)
171
- _set_hash(u, pwd)
172
- s.add(u); s.commit()
173
- log.info("[AUTH] created id=%s", u.id)
174
- session["uid"], session["email"] = u.id, u.email
175
- return redirect(url_for("index"))
176
-
177
- @app.get("/logout")
178
  def logout():
179
  session.clear()
180
- return redirect(url_for("login_get"))
181
-
182
- @app.post("/translate")
183
- def translate_api():
184
- if not session.get("uid"):
185
- return jsonify({"error": "not logged in"}), 401
 
 
 
 
 
 
 
186
  data = request.get_json(silent=True) or {}
187
  text = (data.get("text") or "").strip()
188
  if not text:
189
- return jsonify({"error": "empty text"}), 400
190
- norm = prenorm(text)
191
- result = translate_mt(norm)
192
- with SessionLocal() as s:
193
- s.add(Translation(user_id=session["uid"], src=text, mt=result))
194
- s.commit()
195
- return jsonify({"mt": result})
196
-
197
- @app.get("/debug")
198
- def debug():
199
- return jsonify(dict(session=session, cookies=request.cookies))
200
-
 
201
  if __name__ == "__main__":
202
- app.run(host="0.0.0.0", port=7860, debug=True)
 
 
1
+ # app.py — PapuaTranslate (HF Spaces, Flask)
2
+ import os, logging
3
+ from functools import lru_cache
4
+ from typing import Optional
 
5
  from flask import (
6
  Flask, render_template, request, redirect, url_for,
7
+ session, flash, jsonify
8
  )
9
+ from werkzeug.security import check_password_hash, generate_password_hash
10
 
11
+ # --- SQLAlchemy (opsional: Supabase Postgres) ---
12
+ from sqlalchemy import create_engine, text
13
+ from sqlalchemy.exc import SQLAlchemyError
14
+
15
+ # --- Model (lazy load) ---
16
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
18
  from peft import PeftModel
19
 
20
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
21
+ log = logging.getLogger("papua-translate")
22
+
23
+ # ================== ENV ==================
24
+ SECRET_KEY = os.getenv("SECRET_KEY", "dev-secret-please-set")
25
+ DATABASE_URL = os.getenv("DATABASE_URL", "") # e.g. postgresql+psycopg2://user:pass@host:6543/postgres?sslmode=require
26
+ BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small")
27
+ ADAPTER_ID = os.getenv("ADAPTER_ID", "") # e.g. your LoRA on HF
28
+ DISABLE_AUTH = os.getenv("DISABLE_AUTH", "false").lower() in ("1", "true", "yes")
29
+
30
+ # ================== APP ==================
31
+ app = Flask(__name__, template_folder="templates", static_folder="static")
32
+ app.secret_key = SECRET_KEY
33
+
34
+ # Cookie settings aman untuk HF (HTTPS)
35
+ app.config.update(
36
+ SESSION_COOKIE_HTTPONLY=True,
37
+ SESSION_COOKIE_SAMESITE="Lax",
38
+ SESSION_COOKIE_SECURE=True, # HF pakai HTTPS
39
+ PERMANENT_SESSION_LIFETIME=60*60*8, # 8 jam
40
+ )
41
+
42
+ # ================== DB (opsional) ==================
43
+ engine = None
44
+ if DATABASE_URL:
45
+ try:
46
+ engine = create_engine(DATABASE_URL, pool_pre_ping=True, pool_recycle=1800)
47
+ with engine.connect() as conn:
48
+ conn.execute(text("SELECT 1"))
49
+ log.info("[DB] Connected OK")
50
+ except SQLAlchemyError as e:
51
+ log.error(f"[DB] Failed: {e}")
52
+
53
+ # ================== AUTH MOCK ==================
54
+ # Untuk demo: 1 user statis via ENV (atau bisa kamu simpan di DB)
55
+ ADMIN_USER = os.getenv("ADMIN_USER", "admin@example.com")
56
+ ADMIN_PASS_HASH = os.getenv("ADMIN_PASS_HASH", generate_password_hash(os.getenv("ADMIN_PASS", "admin123")))
57
+
58
+ def is_logged_in() -> bool:
59
+ return bool(session.get("uid"))
60
+
61
+ # ================== LAZY MODEL ==================
62
+ @lru_cache(maxsize=1)
63
+ def _load_model():
64
+ """
65
+ Load base mT5 + attach LoRA (jika ada). Diload sekali saat dipakai pertama kali.
66
+ """
67
+ log.info(f"[MODEL] Loading base: {BASE_MODEL_ID}")
68
+ tok = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
69
+ base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
70
+ if ADAPTER_ID:
71
+ log.info(f"[MODEL] Attaching LoRA: {ADAPTER_ID}")
72
+ base = PeftModel.from_pretrained(base, ADAPTER_ID)
73
+ device = "cuda" if torch.cuda.is_available() else "cpu"
74
+ base.to(device)
75
+ base.eval()
76
+ log.info(f"[MODEL] Ready on {device}")
77
+ return tok, base, device
78
+
79
+ def translate_text(input_text: str, max_new_tokens: int = 64) -> str:
80
+ tok, model, device = _load_model()
81
+ inputs = tok(input_text, return_tensors="pt", truncation=True).to(device)
82
+ with torch.no_grad():
83
+ out = model.generate(
84
+ **inputs,
85
+ max_new_tokens=max_new_tokens,
86
+ num_beams=4,
87
+ length_penalty=1.0
88
+ )
89
+ return tok.decode(out[0], skip_special_tokens=True)
90
+
91
+ # ================== ROUTES ==================
92
+ @app.route("/")
93
+ def home():
94
+ # Jika auth dimatikan, langsung ke dashboard
95
+ if DISABLE_AUTH or is_logged_in():
96
+ return redirect(url_for("dashboard"))
97
+ return redirect(url_for("login"))
98
+
99
+ @app.route("/login", methods=["GET", "POST"])
100
+ def login():
101
+ if DISABLE_AUTH:
102
+ session["uid"] = "debug-user"
103
+ return redirect(url_for("dashboard"))
104
+
105
+ if request.method == "POST":
106
+ email = request.form.get("email", "").strip().lower()
107
+ password = request.form.get("password", "")
108
+ if email == ADMIN_USER and check_password_hash(ADMIN_PASS_HASH, password):
109
+ session["uid"] = email
110
+ return redirect(url_for("dashboard"))
111
+ flash("Email atau password salah.", "error")
112
+ return render_template("login.html")
113
  return render_template("login.html")
114
 
115
+ @app.route("/logout")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  def logout():
117
  session.clear()
118
+ flash("Anda telah keluar.", "info")
119
+ return redirect(url_for("login"))
120
+
121
+ @app.route("/dashboard")
122
+ def dashboard():
123
+ if not (DISABLE_AUTH or is_logged_in()):
124
+ return redirect(url_for("login"))
125
+ return render_template("dashboard.html")
126
+
127
+ @app.route("/translate", methods=["POST"])
128
+ def api_translate():
129
+ if not (DISABLE_AUTH or is_logged_in()):
130
+ return jsonify({"ok": False, "error": "Unauthorized"}), 401
131
  data = request.get_json(silent=True) or {}
132
  text = (data.get("text") or "").strip()
133
  if not text:
134
+ return jsonify({"ok": False, "error": "Text kosong"}), 400
135
+ try:
136
+ result = translate_text(text)
137
+ return jsonify({"ok": True, "result": result})
138
+ except Exception as e:
139
+ log.exception("Translate error")
140
+ return jsonify({"ok": False, "error": str(e)}), 500
141
+
142
+ @app.route("/about")
143
+ def about():
144
+ return render_template("about.html")
145
+
146
+ # --- local run (Spaces akan pakai gunicorn) ---
147
  if __name__ == "__main__":
148
+ port = int(os.environ.get("PORT", 7860))
149
+ app.run(host="0.0.0.0", port=port, debug=False)