Spaces:

Fatitommy
/

application

Sleeping

App Files Files Community

application / app.py

Fatitommy

Update app.py

6a4685e verified 9 days ago

raw

history blame contribute delete

5.92 kB

	"""
	VoiceAura Translation API
	Models:
	1. SLPG/English_to_Urdu_Unsupervised_MT (en → ur)
	2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s → pa-g)
	3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g → pa-s)
	"""

	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import os, requests, argparse, torch, re

	# ✅ PyTorch 2.6 fix
	torch.serialization.add_safe_globals([argparse.Namespace])
	_original_torch_load = torch.load

	def patched_torch_load(args, *kwargs):
	kwargs["weights_only"] = False
	return _original_torch_load(args, *kwargs)

	app = FastAPI()
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ── Model configs ─────────────────────────────────────────
	MODELS_CONFIG = {
	"en-ur": {
	"files": {
	"checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt",
	"dict.en.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt",
	"dict.ur.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt",
	},
	"dir": "models/en_ur",
	"checkpoint": "checkpoint_8_96000.pt",
	"detokenize": False,
	"instance": None,
	},
	"pa-s-pa-g": {
	"files": {
	"checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt",
	"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt",
	"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt",
	},
	"dir": "models/pa_s_pa_g",
	"checkpoint": "checkpoint_5_78000.pt",
	"detokenize": True,
	"instance": None,
	},
	"pa-g-pa-s": {
	"files": {
	"checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt",
	"dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt",
	"dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt",
	},
	"dir": "models/pa_g_pa_s",
	"checkpoint": "checkpoint_13_129000.pt",
	"detokenize": True,
	"instance": None,
	},
	}

	# ── Helpers ───────────────────────────────────────────────
	def download_file(url: str, path: str):
	if os.path.exists(path):
	print(f"[✓] Exists: {path}")
	return
	print(f"[↓] Downloading: {path} ...")
	os.makedirs(os.path.dirname(path), exist_ok=True)
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(path, "wb") as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	print(f"[✓] Done: {path}")

	def detokenize(sentence: str) -> str:
	"""
	SLPG original logic — exactly same as their Streamlit app:
	▁ = word start marker
	'ت ُس ِیں' spaces already sahi hain model ke output mein
	bas ▁ remove karo
	"""
	return sentence.replace('▁', '').strip()

	def load_model(pair: str):
	cfg = MODELS_CONFIG[pair]
	if cfg["instance"] is not None:
	return cfg["instance"]

	for fname, url in cfg["files"].items():
	download_file(url, os.path.join(cfg["dir"], fname))

	torch.load = patched_torch_load
	from fairseq.models.transformer import TransformerModel
	model = TransformerModel.from_pretrained(
	cfg["dir"],
	checkpoint_file=cfg["checkpoint"],
	data_name_or_path=cfg["dir"],
	)
	torch.load = _original_torch_load
	model.eval()
	cfg["instance"] = model
	print(f"[✓] Model ready: {pair}")
	return model

	# ── Startup ───────────────────────────────────────────────
	@app.on_event("startup")
	async def startup():
	for pair in MODELS_CONFIG:
	load_model(pair)

	# ── API ───────────────────────────────────────────────────
	class Req(BaseModel):
	text: str
	from_lang: str = "en"
	to_lang: str = "ur"

	@app.get("/")
	def root():
	loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG}
	return {"status": "VoiceAura API ✓", "models_loaded": loaded}

	@app.post("/translate")
	def translate(req: Req):
	if not req.text.strip():
	return {"success": False, "translation": ""}

	pair = f"{req.from_lang}-{req.to_lang}"

	if pair not in MODELS_CONFIG:
	return {"success": False, "translation": f"⚠️ Pair '{pair}' not supported."}

	try:
	cfg = MODELS_CONFIG[pair]
	model = load_model(pair)
	raw = model.translate(req.text.strip())

	print(f"[DEBUG] pair={pair} \| input={req.text} \| raw={repr(raw)}")

	result = detokenize(raw) if cfg["detokenize"] else raw

	print(f"[DEBUG] final={repr(result)}")

	return {
	"success": True,
	"translation": result,
	"pair": pair,
	"raw": raw,
	}

	except Exception as e:
	print(f"[ERROR] [{pair}]: {e}")
	return {"success": False, "translation": str(e)}