aziizpra commited on
Commit
43b704a
·
1 Parent(s): 1f9f845

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +22 -0
  2. MachineTranslation.py +28 -0
  3. app.py +58 -0
  4. download_model.py +7 -0
  5. requirements.txt +7 -0
  6. utils.py +1 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.9
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy the application files
8
+ COPY app.py .
9
+ COPY MachineTranslation.py .
10
+ COPY requirements.txt .
11
+ COPY utils.py .
12
+ COPY download_model.py .
13
+
14
+ # Install dependencies
15
+ RUN pip3 install -r requirements.txt
16
+ RUN python3 download_model.py
17
+
18
+ # Expose the port
19
+ EXPOSE 8000
20
+
21
+ # Run the application
22
+ CMD ["python3", "app.py"]
MachineTranslation.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
+ import time
3
+
4
+
5
+ class MachineTranslation:
6
+ def __init__(self, name_model="facebook/nllb-200-distilled-600M"):
7
+ self.name_model = name_model
8
+
9
+ self.start_time_tokenizer = time.perf_counter()
10
+ self.tokenizer = AutoTokenizer.from_pretrained(
11
+ self.name_model)
12
+ self.end_time_tokenizer = time.perf_counter()
13
+ # load time tokenizer
14
+ self.time_tokenizer = self.end_time_tokenizer - self.start_time_tokenizer
15
+ print("load time tokenizer :", self.time_tokenizer)
16
+ self.start_time_nllb = time.perf_counter()
17
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(
18
+ self.name_model)
19
+ self.end_time_nllb = time.perf_counter()
20
+ # load time nllb
21
+ self.time_nllb = self.end_time_nllb - self.start_time_nllb
22
+ print("load time nllb :", self.time_nllb)
23
+
24
+
25
+ def predict(self, text, from_lang, to_lang):
26
+ classifier = pipeline("translation", model=self.model, tokenizer=self.tokenizer,
27
+ src_lang=from_lang, tgt_lang=to_lang, max_length=len(text))
28
+ return classifier(text)[0]["translation_text"]
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from pydantic import BaseModel
3
+ from fastapi import FastAPI, status
4
+ from starlette.responses import JSONResponse
5
+ from MachineTranslation import MachineTranslation
6
+ from utils import PROVIDED_LANGUAGES
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ import time
9
+
10
+ class RequestBody(BaseModel):
11
+ text: str
12
+ from_lang: str
13
+ to_lang: str
14
+
15
+ app = FastAPI(docs_url=None, redoc_url=None)
16
+ machine_translation = MachineTranslation("facebook/nllb-200-distilled-600M")
17
+
18
+ origins = ['*']
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=origins,
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ @app.post("/predict")
29
+ async def predict(data_request: RequestBody):
30
+ if not data_request.text:
31
+ return JSONResponse({
32
+ "errors": "Please fill text!"
33
+ }, status_code=status.HTTP_400_BAD_REQUEST)
34
+ if len(data_request.text) > 5000:
35
+ return JSONResponse({
36
+ "errors": "The Number of Characters Exceeds The Limit"
37
+ }, status_code=status.HTTP_400_BAD_REQUEST)
38
+ if data_request.from_lang not in PROVIDED_LANGUAGES or data_request.to_lang not in PROVIDED_LANGUAGES:
39
+ return JSONResponse({
40
+ "errors": "Language not found!"
41
+ }, status_code=status.HTTP_400_BAD_REQUEST)
42
+ try:
43
+ # waktu prediksi
44
+ time_before = time.perf_counter()
45
+ result = machine_translation.predict(data_request.text, data_request.from_lang, data_request.to_lang)
46
+ time_after = time.perf_counter()
47
+ return JSONResponse({
48
+ "result": result,
49
+ "inference_time": time_after - time_before
50
+ }, status_code=status.HTTP_200_OK)
51
+ except Exception:
52
+ return JSONResponse({
53
+ "errors": "Please contact your administrator"
54
+ }, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ uvicorn.run(app, host="localhost", port=8000)
download_model.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
+
3
+ tokenizer = AutoTokenizer.from_pretrained(
4
+ "facebook/nllb-200-distilled-600M")
5
+
6
+ model = AutoModelForSeq2SeqLM.from_pretrained(
7
+ "facebook/nllb-200-distilled-600M")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pydantic==1.8.2
2
+ starlette==0.14.2
3
+ transformers==4.30.0
4
+ uvicorn[standard]
5
+ fastapi
6
+ torch>=1.13.1
7
+ torchvision>=0.14.1
utils.py ADDED
@@ -0,0 +1 @@
 
 
1
+ PROVIDED_LANGUAGES = ["bjn_Latn", "ind_Latn",]