Spaces:

rajeshradhakrishnan
/

english-malayalam

Runtime error

rajeshradhakrishnan commited on May 25, 2023

Commit

656a72b

•

1 Parent(s): adc5884

changed to AI4Bharat IndicTrans-English2Indic

Files changed (3) hide show

main.py CHANGED Viewed

@@ -1,29 +1,45 @@
 import os
-from fastapi import FastAPI, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
-from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
-model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
-tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
-app = FastAPI()
 @app.get("/infer_t5")
 def t5(input):
-    model_inputs = tokenizer(input, return_tensors="pt")
-    # translate from English to Malayalam
-    generated_tokens = model.generate(
-        **model_inputs,
-        forced_bos_token_id=tokenizer.lang_code_to_id["ml_IN"]
     )
-    output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
-    return {"output":output}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

 import os
+import requests
+import json
+from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
+from mosestokenizer import *
+from indicnlp.tokenize import sentence_tokenize
+INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
+def split_sentences(paragraph, language):
+    if language == "en":
+        with MosesSentenceSplitter(language) as splitter:
+            return splitter([paragraph])
+    elif language in INDIC:
+        return sentence_tokenize.sentence_split(paragraph, lang=language)
+# model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
+# tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
+app = FastAPI()
+uri = "http://216.48.181.177:5050"
 @app.get("/infer_t5")
 def t5(input):
+    API_URL = f"{uri}/batch_translate"
+    sentence_batch = split_sentences(input, language="en")
+    response = requests.post(
+        API_URL,
+        json={
+    "text_lines": sentence_batch,
+    "source_language": "en",
+    "target_language": "ml"
+    },
     )
+    output = json.loads(response.text)
+    return {"output":output["text_lines"][0]}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

requirements.txt CHANGED Viewed

@@ -4,4 +4,6 @@ sentencepiece==0.1.*
 torch==1.11.*
 transformers==4.*
 protobuf== 3.19.0
-uvicorn[standard]==0.17.*

 torch==1.11.*
 transformers==4.*
 protobuf== 3.19.0
+uvicorn[standard]==0.17.*
+mosestokenizer
+indic-nlp-library

static/script.js CHANGED Viewed

@@ -69,8 +69,8 @@ async function getMessage(){
             const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
             // const en_text_ml = "English: "  + assistantText[0] + " Malayalam:";
             // console.log(en_text_ml)
-            console.log(prompterText)
-            console.log(assistantText)
             outPutElement.textContent = await translateText(assistantText[0]);
             const pElement = document.createElement('p')
             pElement.textContent = inputElement.value

             const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
             // const en_text_ml = "English: "  + assistantText[0] + " Malayalam:";
             // console.log(en_text_ml)
+            //console.log(prompterText)
+            //console.log(assistantText)
             outPutElement.textContent = await translateText(assistantText[0]);
             const pElement = document.createElement('p')
             pElement.textContent = inputElement.value