rajeshradhakrishnan commited on
Commit
656a72b
1 Parent(s): adc5884

changed to AI4Bharat IndicTrans-English2Indic

Browse files
Files changed (3) hide show
  1. main.py +30 -14
  2. requirements.txt +3 -1
  3. static/script.js +2 -2
main.py CHANGED
@@ -1,29 +1,45 @@
1
  import os
2
- from fastapi import FastAPI, Request
 
 
3
  from fastapi.staticfiles import StaticFiles
4
  from fastapi.responses import FileResponse
5
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 
6
 
 
7
 
8
- model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
9
- tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
 
 
 
 
 
10
 
11
- app = FastAPI()
 
 
12
 
 
13
 
 
14
 
15
  @app.get("/infer_t5")
16
  def t5(input):
17
- model_inputs = tokenizer(input, return_tensors="pt")
18
-
19
- # translate from English to Malayalam
20
- generated_tokens = model.generate(
21
- **model_inputs,
22
- forced_bos_token_id=tokenizer.lang_code_to_id["ml_IN"]
 
 
 
23
  )
24
-
25
- output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
26
- return {"output":output}
27
 
28
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
29
 
 
1
  import os
2
+ import requests
3
+ import json
4
+ from fastapi import FastAPI
5
  from fastapi.staticfiles import StaticFiles
6
  from fastapi.responses import FileResponse
7
+ from mosestokenizer import *
8
+ from indicnlp.tokenize import sentence_tokenize
9
 
10
+ INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
11
 
12
+ def split_sentences(paragraph, language):
13
+ if language == "en":
14
+ with MosesSentenceSplitter(language) as splitter:
15
+ return splitter([paragraph])
16
+ elif language in INDIC:
17
+ return sentence_tokenize.sentence_split(paragraph, lang=language)
18
+
19
 
20
+
21
+ # model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
22
+ # tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
23
 
24
+ app = FastAPI()
25
 
26
+ uri = "http://216.48.181.177:5050"
27
 
28
  @app.get("/infer_t5")
29
  def t5(input):
30
+ API_URL = f"{uri}/batch_translate"
31
+ sentence_batch = split_sentences(input, language="en")
32
+ response = requests.post(
33
+ API_URL,
34
+ json={
35
+ "text_lines": sentence_batch,
36
+ "source_language": "en",
37
+ "target_language": "ml"
38
+ },
39
  )
40
+
41
+ output = json.loads(response.text)
42
+ return {"output":output["text_lines"][0]}
43
 
44
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
45
 
requirements.txt CHANGED
@@ -4,4 +4,6 @@ sentencepiece==0.1.*
4
  torch==1.11.*
5
  transformers==4.*
6
  protobuf== 3.19.0
7
- uvicorn[standard]==0.17.*
 
 
 
4
  torch==1.11.*
5
  transformers==4.*
6
  protobuf== 3.19.0
7
+ uvicorn[standard]==0.17.*
8
+ mosestokenizer
9
+ indic-nlp-library
static/script.js CHANGED
@@ -69,8 +69,8 @@ async function getMessage(){
69
  const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
70
  // const en_text_ml = "English: " + assistantText[0] + " Malayalam:";
71
  // console.log(en_text_ml)
72
- console.log(prompterText)
73
- console.log(assistantText)
74
  outPutElement.textContent = await translateText(assistantText[0]);
75
  const pElement = document.createElement('p')
76
  pElement.textContent = inputElement.value
 
69
  const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
70
  // const en_text_ml = "English: " + assistantText[0] + " Malayalam:";
71
  // console.log(en_text_ml)
72
+ //console.log(prompterText)
73
+ //console.log(assistantText)
74
  outPutElement.textContent = await translateText(assistantText[0]);
75
  const pElement = document.createElement('p')
76
  pElement.textContent = inputElement.value