File size: 3,245 Bytes
efb198f
 
 
 
 
 
6df8ecd
efb198f
6df8ecd
 
 
 
 
 
 
 
efb198f
6df8ecd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb198f
 
6df8ecd
efb198f
 
6df8ecd
efb198f
 
 
 
 
 
 
6df8ecd
efb198f
 
 
 
 
 
 
6df8ecd
efb198f
 
 
6df8ecd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb198f
6df8ecd
 
 
 
efb198f
6df8ecd
efb198f
 
6df8ecd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from fastapi import FastAPI, Request
from transformers import MarianMTModel, MarianTokenizer
import torch

app = FastAPI()

# Map target languages to Hugging Face model IDs
MODEL_MAP = {
    "bg": "Helsinki-NLP/opus-mt-tc-big-en-bg",
    "cs": "Helsinki-NLP/opus-mt-en-cs",
    "da": "Helsinki-NLP/opus-mt-en-da",
    "de": "Helsinki-NLP/opus-mt-en-de",
    "el": "Helsinki-NLP/opus-mt-tc-big-en-el",
    "es": "facebook/nllb-200-distilled-600M",
    "et": "Helsinki-NLP/opus-mt-tc-big-en-et",
    "fi": "Helsinki-NLP/opus-mt-tc-big-en-fi",
    "fr": "Helsinki-NLP/opus-mt-en-fr",
    "hr": "facebook/mbart-large-50-many-to-many-mmt",
    "hu": "Helsinki-NLP/opus-mt-tc-big-en-hu",
    "is": "facebook/nllb-200-distilled-600M",
    "it": "facebook/nllb-200-distilled-600M",
    "lt": "Helsinki-NLP/opus-mt-tc-big-en-lt",
    "lv": "facebook/mbart-large-50-many-to-many-mmt",
    "mk": "facebook/nllb-200-distilled-600M",
    "nb": "facebook/mbart-large-50-many-to-many-mmt", #place holder!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    "nl": "facebook/mbart-large-50-many-to-many-mmt",
    "no": "facebook/mbart-large-50-many-to-many-mmt", #place holder!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    "pl": "facebook/nllb-200-distilled-600M",
    "pt": "facebook/mbart-large-50-many-to-many-mmt",
    "ro": "facebook/mbart-large-50-many-to-many-mmt",
    "sk": "Helsinki-NLP/opus-mt-en-sk",
    "sl": "alirezamsh/small100",
    "sq": "alirezamsh/small100",
    "sv": "Helsinki-NLP/opus-mt-en-sv",
    "tr": "facebook/nllb-200-distilled-600M"
}


MODEL_CACHE = {}

# βœ… Load Hugging Face model (Helsinki or Small100)
def load_model(model_id):
    if model_id not in MODEL_CACHE:
        tokenizer = MarianTokenizer.from_pretrained(model_id)
        model = MarianMTModel.from_pretrained(model_id).to("cpu")
        MODEL_CACHE[model_id] = (tokenizer, model)
    return MODEL_CACHE[model_id]

# βœ… POST /translate
@app.post("/translate")
async def translate(request: Request):
    data = await request.json()
    text = data.get("text")
    target_lang = data.get("target_lang")

    if not text or not target_lang:
        return {"error": "Missing 'text' or 'target_lang'"}

    model_id = MODEL_MAP.get(target_lang)
    if not model_id:
        return {"error": f"No model found for target language '{target_lang}'"}

    if model_id.startswith("facebook/"):
        return {"translation": f"[{target_lang}] uses model '{model_id}', which is not supported in this Space yet."}

    try:
        tokenizer, model = load_model(model_id)
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(model.device)
        outputs = model.generate(**inputs, num_beams=5, length_penalty=1.2, early_stopping=True)
        return {"translation": tokenizer.decode(outputs[0], skip_special_tokens=True)}
    except Exception as e:
        return {"error": f"Translation failed: {str(e)}"}

# βœ… GET /languages
@app.get("/languages")
def list_languages():
    return {"supported_languages": list(MODEL_MAP.keys())}

# βœ… GET /health
@app.get("/health")
def health():
    return {"status": "ok"}

# βœ… Uvicorn startup (required by Hugging Face)
import uvicorn
if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=7860)