Spaces:
Runtime error
Runtime error
Upload 6 files
Browse files- Dockerfile +22 -0
- MachineTranslation.py +28 -0
- app.py +58 -0
- download_model.py +7 -0
- requirements.txt +7 -0
- utils.py +1 -0
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
# Set working directory
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the application files
|
8 |
+
COPY app.py .
|
9 |
+
COPY MachineTranslation.py .
|
10 |
+
COPY requirements.txt .
|
11 |
+
COPY utils.py .
|
12 |
+
COPY download_model.py .
|
13 |
+
|
14 |
+
# Install dependencies
|
15 |
+
RUN pip3 install -r requirements.txt
|
16 |
+
RUN python3 download_model.py
|
17 |
+
|
18 |
+
# Expose the port
|
19 |
+
EXPOSE 8000
|
20 |
+
|
21 |
+
# Run the application
|
22 |
+
CMD ["python3", "app.py"]
|
MachineTranslation.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
2 |
+
import time
|
3 |
+
|
4 |
+
|
5 |
+
class MachineTranslation:
|
6 |
+
def __init__(self, name_model="facebook/nllb-200-distilled-600M"):
|
7 |
+
self.name_model = name_model
|
8 |
+
|
9 |
+
self.start_time_tokenizer = time.perf_counter()
|
10 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
11 |
+
self.name_model)
|
12 |
+
self.end_time_tokenizer = time.perf_counter()
|
13 |
+
# load time tokenizer
|
14 |
+
self.time_tokenizer = self.end_time_tokenizer - self.start_time_tokenizer
|
15 |
+
print("load time tokenizer :", self.time_tokenizer)
|
16 |
+
self.start_time_nllb = time.perf_counter()
|
17 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(
|
18 |
+
self.name_model)
|
19 |
+
self.end_time_nllb = time.perf_counter()
|
20 |
+
# load time nllb
|
21 |
+
self.time_nllb = self.end_time_nllb - self.start_time_nllb
|
22 |
+
print("load time nllb :", self.time_nllb)
|
23 |
+
|
24 |
+
|
25 |
+
def predict(self, text, from_lang, to_lang):
|
26 |
+
classifier = pipeline("translation", model=self.model, tokenizer=self.tokenizer,
|
27 |
+
src_lang=from_lang, tgt_lang=to_lang, max_length=len(text))
|
28 |
+
return classifier(text)[0]["translation_text"]
|
app.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uvicorn
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from fastapi import FastAPI, status
|
4 |
+
from starlette.responses import JSONResponse
|
5 |
+
from MachineTranslation import MachineTranslation
|
6 |
+
from utils import PROVIDED_LANGUAGES
|
7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
import time
|
9 |
+
|
10 |
+
class RequestBody(BaseModel):
|
11 |
+
text: str
|
12 |
+
from_lang: str
|
13 |
+
to_lang: str
|
14 |
+
|
15 |
+
app = FastAPI(docs_url=None, redoc_url=None)
|
16 |
+
machine_translation = MachineTranslation("facebook/nllb-200-distilled-600M")
|
17 |
+
|
18 |
+
origins = ['*']
|
19 |
+
|
20 |
+
app.add_middleware(
|
21 |
+
CORSMiddleware,
|
22 |
+
allow_origins=origins,
|
23 |
+
allow_credentials=True,
|
24 |
+
allow_methods=["*"],
|
25 |
+
allow_headers=["*"],
|
26 |
+
)
|
27 |
+
|
28 |
+
@app.post("/predict")
|
29 |
+
async def predict(data_request: RequestBody):
|
30 |
+
if not data_request.text:
|
31 |
+
return JSONResponse({
|
32 |
+
"errors": "Please fill text!"
|
33 |
+
}, status_code=status.HTTP_400_BAD_REQUEST)
|
34 |
+
if len(data_request.text) > 5000:
|
35 |
+
return JSONResponse({
|
36 |
+
"errors": "The Number of Characters Exceeds The Limit"
|
37 |
+
}, status_code=status.HTTP_400_BAD_REQUEST)
|
38 |
+
if data_request.from_lang not in PROVIDED_LANGUAGES or data_request.to_lang not in PROVIDED_LANGUAGES:
|
39 |
+
return JSONResponse({
|
40 |
+
"errors": "Language not found!"
|
41 |
+
}, status_code=status.HTTP_400_BAD_REQUEST)
|
42 |
+
try:
|
43 |
+
# waktu prediksi
|
44 |
+
time_before = time.perf_counter()
|
45 |
+
result = machine_translation.predict(data_request.text, data_request.from_lang, data_request.to_lang)
|
46 |
+
time_after = time.perf_counter()
|
47 |
+
return JSONResponse({
|
48 |
+
"result": result,
|
49 |
+
"inference_time": time_after - time_before
|
50 |
+
}, status_code=status.HTTP_200_OK)
|
51 |
+
except Exception:
|
52 |
+
return JSONResponse({
|
53 |
+
"errors": "Please contact your administrator"
|
54 |
+
}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
55 |
+
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
uvicorn.run(app, host="localhost", port=8000)
|
download_model.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
2 |
+
|
3 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
4 |
+
"facebook/nllb-200-distilled-600M")
|
5 |
+
|
6 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
7 |
+
"facebook/nllb-200-distilled-600M")
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pydantic==1.8.2
|
2 |
+
starlette==0.14.2
|
3 |
+
transformers==4.30.0
|
4 |
+
uvicorn[standard]
|
5 |
+
fastapi
|
6 |
+
torch>=1.13.1
|
7 |
+
torchvision>=0.14.1
|
utils.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
PROVIDED_LANGUAGES = ["bjn_Latn", "ind_Latn",]
|