rajeshradhakrishnan's picture
changed to AI4Bharat IndicTrans-English2Indic
656a72b
raw
history blame contribute delete
No virus
1.44 kB
import os
import requests
import json
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from mosestokenizer import *
from indicnlp.tokenize import sentence_tokenize
INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
def split_sentences(paragraph, language):
if language == "en":
with MosesSentenceSplitter(language) as splitter:
return splitter([paragraph])
elif language in INDIC:
return sentence_tokenize.sentence_split(paragraph, lang=language)
# model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
# tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
app = FastAPI()
uri = "http://216.48.181.177:5050"
@app.get("/infer_t5")
def t5(input):
API_URL = f"{uri}/batch_translate"
sentence_batch = split_sentences(input, language="en")
response = requests.post(
API_URL,
json={
"text_lines": sentence_batch,
"source_language": "en",
"target_language": "ml"
},
)
output = json.loads(response.text)
return {"output":output["text_lines"][0]}
app.mount("/", StaticFiles(directory="static", html=True), name="static")
@app.get("/")
def index() -> FileResponse:
return FileResponse(path="/app/static/index.html", media_type="text/html")