application / app.py
Fatitommy's picture
Update app.py
6a4685e verified
"""
VoiceAura Translation API
Models:
1. SLPG/English_to_Urdu_Unsupervised_MT (en β†’ ur)
2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s β†’ pa-g)
3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g β†’ pa-s)
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os, requests, argparse, torch, re
# βœ… PyTorch 2.6 fix
torch.serialization.add_safe_globals([argparse.Namespace])
_original_torch_load = torch.load
def patched_torch_load(*args, **kwargs):
kwargs["weights_only"] = False
return _original_torch_load(*args, **kwargs)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ── Model configs ─────────────────────────────────────────
MODELS_CONFIG = {
"en-ur": {
"files": {
"checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt",
"dict.en.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt",
"dict.ur.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt",
},
"dir": "models/en_ur",
"checkpoint": "checkpoint_8_96000.pt",
"detokenize": False,
"instance": None,
},
"pa-s-pa-g": {
"files": {
"checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt",
"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt",
"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt",
},
"dir": "models/pa_s_pa_g",
"checkpoint": "checkpoint_5_78000.pt",
"detokenize": True,
"instance": None,
},
"pa-g-pa-s": {
"files": {
"checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt",
"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt",
"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt",
},
"dir": "models/pa_g_pa_s",
"checkpoint": "checkpoint_13_129000.pt",
"detokenize": True,
"instance": None,
},
}
# ── Helpers ───────────────────────────────────────────────
def download_file(url: str, path: str):
if os.path.exists(path):
print(f"[βœ“] Exists: {path}")
return
print(f"[↓] Downloading: {path} ...")
os.makedirs(os.path.dirname(path), exist_ok=True)
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"[βœ“] Done: {path}")
def detokenize(sentence: str) -> str:
"""
SLPG original logic β€” exactly same as their Streamlit app:
▁ = word start marker
'Ψͺ ُس ِیں' spaces already sahi hain model ke output mein
bas ▁ remove karo
"""
return sentence.replace('▁', '').strip()
def load_model(pair: str):
cfg = MODELS_CONFIG[pair]
if cfg["instance"] is not None:
return cfg["instance"]
for fname, url in cfg["files"].items():
download_file(url, os.path.join(cfg["dir"], fname))
torch.load = patched_torch_load
from fairseq.models.transformer import TransformerModel
model = TransformerModel.from_pretrained(
cfg["dir"],
checkpoint_file=cfg["checkpoint"],
data_name_or_path=cfg["dir"],
)
torch.load = _original_torch_load
model.eval()
cfg["instance"] = model
print(f"[βœ“] Model ready: {pair}")
return model
# ── Startup ───────────────────────────────────────────────
@app.on_event("startup")
async def startup():
for pair in MODELS_CONFIG:
load_model(pair)
# ── API ───────────────────────────────────────────────────
class Req(BaseModel):
text: str
from_lang: str = "en"
to_lang: str = "ur"
@app.get("/")
def root():
loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG}
return {"status": "VoiceAura API βœ“", "models_loaded": loaded}
@app.post("/translate")
def translate(req: Req):
if not req.text.strip():
return {"success": False, "translation": ""}
pair = f"{req.from_lang}-{req.to_lang}"
if pair not in MODELS_CONFIG:
return {"success": False, "translation": f"⚠️ Pair '{pair}' not supported."}
try:
cfg = MODELS_CONFIG[pair]
model = load_model(pair)
raw = model.translate(req.text.strip())
print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}")
result = detokenize(raw) if cfg["detokenize"] else raw
print(f"[DEBUG] final={repr(result)}")
return {
"success": True,
"translation": result,
"pair": pair,
"raw": raw,
}
except Exception as e:
print(f"[ERROR] [{pair}]: {e}")
return {"success": False, "translation": str(e)}