Spaces:
Sleeping
Sleeping
| """ | |
| VoiceAura Translation API | |
| Models: | |
| 1. SLPG/English_to_Urdu_Unsupervised_MT (en β ur) | |
| 2. SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration (pa-s β pa-g) | |
| 3. SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration (pa-g β pa-s) | |
| """ | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import os, requests, argparse, torch, re | |
| # β PyTorch 2.6 fix | |
| torch.serialization.add_safe_globals([argparse.Namespace]) | |
| _original_torch_load = torch.load | |
| def patched_torch_load(*args, **kwargs): | |
| kwargs["weights_only"] = False | |
| return _original_torch_load(*args, **kwargs) | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ββ Model configs βββββββββββββββββββββββββββββββββββββββββ | |
| MODELS_CONFIG = { | |
| "en-ur": { | |
| "files": { | |
| "checkpoint_8_96000.pt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/checkpoint_8_96000.pt", | |
| "dict.en.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.en.txt", | |
| "dict.ur.txt": "https://huggingface.co/SLPG/English_to_Urdu_Unsupervised_MT/resolve/main/dict.ur.txt", | |
| }, | |
| "dir": "models/en_ur", | |
| "checkpoint": "checkpoint_8_96000.pt", | |
| "detokenize": False, | |
| "instance": None, | |
| }, | |
| "pa-s-pa-g": { | |
| "files": { | |
| "checkpoint_5_78000.pt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/checkpoint_5_78000.pt", | |
| "dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pa.txt", | |
| "dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Shahmukhi_to_Gurmukhi_Transliteration/resolve/main/dict.pk.txt", | |
| }, | |
| "dir": "models/pa_s_pa_g", | |
| "checkpoint": "checkpoint_5_78000.pt", | |
| "detokenize": True, | |
| "instance": None, | |
| }, | |
| "pa-g-pa-s": { | |
| "files": { | |
| "checkpoint_13_129000.pt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/checkpoint_13_129000.pt", | |
| "dict.pa.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pa.txt", | |
| "dict.pk.txt": "https://huggingface.co/SLPG/Punjabi_Gurmukhi_to_Shahmukhi_Transliteration/resolve/main/dict.pk.txt", | |
| }, | |
| "dir": "models/pa_g_pa_s", | |
| "checkpoint": "checkpoint_13_129000.pt", | |
| "detokenize": True, | |
| "instance": None, | |
| }, | |
| } | |
| # ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def download_file(url: str, path: str): | |
| if os.path.exists(path): | |
| print(f"[β] Exists: {path}") | |
| return | |
| print(f"[β] Downloading: {path} ...") | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| with requests.get(url, stream=True) as r: | |
| r.raise_for_status() | |
| with open(path, "wb") as f: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| print(f"[β] Done: {path}") | |
| def detokenize(sentence: str) -> str: | |
| """ | |
| SLPG original logic β exactly same as their Streamlit app: | |
| β = word start marker | |
| 'Ψͺ ΩΨ³ ΩΫΪΊ' spaces already sahi hain model ke output mein | |
| bas β remove karo | |
| """ | |
| return sentence.replace('β', '').strip() | |
| def load_model(pair: str): | |
| cfg = MODELS_CONFIG[pair] | |
| if cfg["instance"] is not None: | |
| return cfg["instance"] | |
| for fname, url in cfg["files"].items(): | |
| download_file(url, os.path.join(cfg["dir"], fname)) | |
| torch.load = patched_torch_load | |
| from fairseq.models.transformer import TransformerModel | |
| model = TransformerModel.from_pretrained( | |
| cfg["dir"], | |
| checkpoint_file=cfg["checkpoint"], | |
| data_name_or_path=cfg["dir"], | |
| ) | |
| torch.load = _original_torch_load | |
| model.eval() | |
| cfg["instance"] = model | |
| print(f"[β] Model ready: {pair}") | |
| return model | |
| # ββ Startup βββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def startup(): | |
| for pair in MODELS_CONFIG: | |
| load_model(pair) | |
| # ββ API βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class Req(BaseModel): | |
| text: str | |
| from_lang: str = "en" | |
| to_lang: str = "ur" | |
| def root(): | |
| loaded = {k: MODELS_CONFIG[k]["instance"] is not None for k in MODELS_CONFIG} | |
| return {"status": "VoiceAura API β", "models_loaded": loaded} | |
| def translate(req: Req): | |
| if not req.text.strip(): | |
| return {"success": False, "translation": ""} | |
| pair = f"{req.from_lang}-{req.to_lang}" | |
| if pair not in MODELS_CONFIG: | |
| return {"success": False, "translation": f"β οΈ Pair '{pair}' not supported."} | |
| try: | |
| cfg = MODELS_CONFIG[pair] | |
| model = load_model(pair) | |
| raw = model.translate(req.text.strip()) | |
| print(f"[DEBUG] pair={pair} | input={req.text} | raw={repr(raw)}") | |
| result = detokenize(raw) if cfg["detokenize"] else raw | |
| print(f"[DEBUG] final={repr(result)}") | |
| return { | |
| "success": True, | |
| "translation": result, | |
| "pair": pair, | |
| "raw": raw, | |
| } | |
| except Exception as e: | |
| print(f"[ERROR] [{pair}]: {e}") | |
| return {"success": False, "translation": str(e)} | |