rajeshradhakrishnan commited on
Commit
f3b1912
1 Parent(s): 74a5927

English-Malayalam Translate v10

Browse files
Files changed (7) hide show
  1. Dockerfile +12 -4
  2. main.py +36 -0
  3. requirements.txt +6 -7
  4. static/index.html +31 -0
  5. static/script.js +84 -0
  6. static/style.css +105 -0
  7. translate.py +0 -56
Dockerfile CHANGED
@@ -5,13 +5,21 @@ FROM python:3.9
5
 
6
  WORKDIR /code
7
 
8
- ARG TRANSFORMERS_CACHE=/code/translate_cache
9
-
10
  COPY ./requirements.txt /code/requirements.txt
11
 
12
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
 
14
  COPY . .
15
 
16
- CMD gunicorn --bind 0.0.0.0:7860 translate:app
17
- #CMD ["uvicorn", "translate:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  WORKDIR /code
7
 
 
 
8
  COPY ./requirements.txt /code/requirements.txt
9
 
10
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
12
  COPY . .
13
 
14
+ RUN useradd -m -u 1000 user
15
+
16
+ USER user
17
+
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ WORKDIR $HOME/app
22
+
23
+ COPY --chown=user . $HOME/app
24
+
25
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.responses import FileResponse
4
+ from fastapi.templating import Jinja2Templates
5
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
6
+
7
+
8
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
9
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
10
+
11
+ app = FastAPI()
12
+ templates = Jinja2Templates(directory="templates")
13
+
14
+ @app.get("/infer_t5")
15
+ def t5(input):
16
+ model_inputs = tokenizer(input, return_tensors="pt")
17
+
18
+ # translate from English to Malayalam
19
+ generated_tokens = model.generate(
20
+ **model_inputs,
21
+ forced_bos_token_id=tokenizer.lang_code_to_id["ml_IN"]
22
+ )
23
+
24
+ output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
25
+ return {"output":output}
26
+
27
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
28
+
29
+ # @app.get("/")
30
+ # def index() -> FileResponse:
31
+ # return FileResponse(path="/app/static/index.html", media_type="text/html")
32
+
33
+ @app.get("/")
34
+ async def index():
35
+ apikey = {"APIKEY": os.environ.get("API_KEY")}
36
+ return templates.TemplateResponse("index.html", {"apikey": apikey})
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
- torch
2
- transformers
3
- sentencepiece
4
- flask
5
- flask_restful
6
- flask-cors
7
- gunicorn==20.1.0
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ sentencepiece==0.1.*
4
+ torch==1.11.*
5
+ transformers==4.*
6
+ uvicorn[standard]==0.17.*
 
static/index.html ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>JavaScipt Open Assistant Clone</title>
6
+ <link rel="stylesheet" href="style.css">
7
+ </head>
8
+ <body>
9
+ <section class="side-bar">
10
+ <button>New Chat</button>
11
+ <div class="history"></div>
12
+ <nav>
13
+ <p>Made by Rajesh</p>
14
+ </nav>
15
+ </section>
16
+ <section class="main">
17
+ <h1>Rajesh - Open Assistant</h1>
18
+ <p id="output"></p>
19
+ <div class="bottom-section">
20
+ <div class="input-container">
21
+ <input>
22
+ <div id="submit">➢</div>
23
+ </div>
24
+ </div>
25
+ <p class="info">Open Assistant - This is the 4th iteration English
26
+ supervised-fine-tuning (SFT) model of the Open-Assistant project.
27
+ </p>
28
+ </section>
29
+ <script src="script.js"></script>
30
+ </body>
31
+ </html>
static/script.js ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const API_KEY = {{ apikey.value }};
2
+
3
+ const translateText = async (text) => {
4
+ const inferResponse = await fetch(`infer_t5?input=${text}`);
5
+ const inferJson = await inferResponse.json();
6
+
7
+ return inferJson.output;
8
+ };
9
+
10
+
11
+ function generatePrompterAssistantText(inputString) {
12
+ // Split the input string into an array of sentences
13
+ const sentences = inputString.split('<|endoftext|>');
14
+
15
+ // Initialize arrays for prompter and assistant text
16
+ let prompterText = [];
17
+ let assistantText = [];
18
+
19
+ // Loop through each sentence and add it to the prompter or assistant text array
20
+ for (let i = 0; i < sentences.length; i++) {
21
+ // Check if the sentence contains the <prompter> tag
22
+ if (sentences[i].includes('<|prompter|>')) {
23
+ // Extract the text within the <prompter> tags and add it to the prompter text array
24
+ const prompterSentence = sentences[i].replace(/<\|prompter\|>/g, '');
25
+ prompterText.push(prompterSentence);
26
+ } else if (sentences[i].includes('<|assistant|>')) {
27
+ const assistantSentence = sentences[i].replace(/<\|assistant\|>/g, '');
28
+ // Add the sentence to the assistant text array
29
+ assistantText.push(assistantSentence);
30
+ }
31
+ }
32
+
33
+ // Return the prompter and assistant text arrays
34
+ return [prompterText, assistantText];
35
+ }
36
+
37
+ const submitButton = document.querySelector('#submit')
38
+ const outPutElement = document.querySelector('#output')
39
+ const inputElement = document.querySelector('input')
40
+ const historyElement = document.querySelector('.history')
41
+ const buttonElement = document.querySelector('button')
42
+
43
+
44
+ function changeInput(value)
45
+ {
46
+ console.log(value)
47
+ const inputElement = document.querySelector('input')
48
+ inputElement.value = value
49
+ }
50
+ async function getMessage(){
51
+ const options = {
52
+ method: "POST",
53
+ headers: {
54
+ Authorization: `Bearer ${API_KEY}`,
55
+ "Content-Type": "application/json"
56
+ },
57
+ body: JSON.stringify({
58
+ inputs: "<|prompter|>" + inputElement.value + "<|endoftext|><|assistant|>"
59
+ })
60
+ }
61
+ try{
62
+ const response = await fetch("https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", options);
63
+ const data = await response.json()
64
+ console.log(data[0].generated_text)
65
+
66
+ if(inputElement.value && data && data[0].generated_text){
67
+ const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
68
+ outPutElement.textContent = = await translateText(assistantText);
69
+ const pElement = document.createElement('p')
70
+ pElement.textContent = inputElement.value
71
+ pElement.addEventListener('click', () => changeInput(pElement.textContent))
72
+ historyElement.append(pElement)
73
+ }
74
+ } catch(error) {
75
+ console.log(error)
76
+ }
77
+ }
78
+
79
+ submitButton.addEventListener('click', getMessage)
80
+
81
+ function clearInput(){
82
+ inputElement.value = ''
83
+ }
84
+ buttonElement.addEventListener('click', clearInput)
static/style.css ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;500;600;700;800&display=swap');
2
+
3
+ * {
4
+ color: #fff;
5
+ font-family: 'Open Sans', sans-serif;
6
+ }
7
+ body {
8
+ margin: 0;
9
+ padding: 0;
10
+ background-color: #343541;
11
+ display: flex;
12
+ }
13
+ h1{
14
+ font-size: 33px;
15
+ font-weight: 600;
16
+ padding: 200px 0;
17
+ }
18
+ .side-bar{
19
+ background-color: #202123;
20
+ width: 244px;
21
+ height: 100vh;
22
+ display: flex;
23
+ flex-direction: column;
24
+ justify-content: space-between;
25
+ }
26
+
27
+ .main{
28
+ display: flex;
29
+ flex-direction: column;
30
+ align-items: center;
31
+ text-align: center;
32
+ justify-content: space-between;
33
+ height: 100vh;
34
+ width: 100%;
35
+ }
36
+
37
+ .bottom-section{
38
+ width: 100%;
39
+ display: flex;
40
+ flex-direction: column;
41
+ justify-content: center;
42
+ align-items: center;
43
+ }
44
+ .info{
45
+ color: rgba(255, 255, 255, 0.5);
46
+ font-size: 11px;
47
+ padding: 10px;
48
+ }
49
+
50
+ input{
51
+ border: none;
52
+ background-color: rgba(255, 255, 255, 0.5);
53
+ width: 100%;
54
+ font-size: 20px;
55
+ padding: 12px 15px;
56
+ border-radius: 5px;
57
+ box-shadow: rgba(0, 0, 0, 0.5) 0 54px 55px,
58
+ rgba(0, 0, 0, 0.5) 0 -12 30px,
59
+ rgba(0, 0, 0, 0.5) 0 4px 6px,
60
+ rgba(0, 0, 0, 0.5) 0 12px 3px,
61
+ rgba(0, 0, 0, 0.5) 0 -3px 5px
62
+
63
+ }
64
+ input:focus{
65
+ outline: none;
66
+ }
67
+
68
+ .input-container{
69
+ position: relative;
70
+ width: 100%;
71
+ max-width: 650px;
72
+ }
73
+
74
+ .input-container #submit{
75
+ position: absolute;
76
+ right: 0;
77
+ bottom: 15px;
78
+ cursor: pointer;
79
+ }
80
+
81
+ button{
82
+ border: solid 0.5px rgba(255, 255, 255, 0.5);
83
+ background-color: transparent;
84
+ border-radius: 5px;
85
+ padding: 10px;
86
+ margin: 10px;
87
+ }
88
+
89
+ nav{
90
+ border-top: solid 0.5px rgba(255, 255, 255, 0.5);
91
+ padding: 10px;
92
+ margin: 10px;
93
+ }
94
+
95
+ .history{
96
+ padding: 10px;
97
+ margin: 10px;
98
+ display: flex;
99
+ flex-direction: column;
100
+ height: 100%;
101
+ }
102
+
103
+ .history p{
104
+ cursor: pointer;
105
+ }
translate.py DELETED
@@ -1,56 +0,0 @@
1
- import torch
2
- import os
3
- from transformers.pipelines import pipeline
4
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
5
- from flask import Flask, request
6
- from flask_restful import Api, Resource
7
- from flask_cors import CORS
8
-
9
- app = Flask(__name__)
10
- CORS(app)
11
- cors = CORS(app, resource={
12
- r"/*": {
13
- "origins": "*"
14
- }
15
- })
16
- api = Api(app)
17
-
18
- app.config['CORS_HEADERS'] = 'Content-Type'
19
-
20
- class Classifier():
21
-
22
- def __init__(self, data_en):
23
- self.model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
24
- self.tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
25
- self.model_inputs = self.tokenizer(data_en, return_tensors="pt")
26
-
27
- # translate from English to Malayalam
28
- self.generated_tokens = self.model.generate(
29
- **self.model_inputs,
30
- forced_bos_token_id=self.tokenizer.lang_code_to_id["ml_IN"]
31
- )
32
- self. translate = self.tokenizer.batch_decode(self.generated_tokens, skip_special_tokens=True)
33
- self.data_en = data_en
34
-
35
- def get_translator(self):
36
- output = self.translate(self.data_en)
37
- return {'output': output}
38
-
39
-
40
- class Translate(Resource):
41
- def post(self):
42
- try:
43
- # Decode json object from the request
44
- json_object = request.get_json()
45
- data_en = json_object["text"]
46
- obj = Classifier(data_en)
47
- except Exception as e:
48
- return {"Message": "Error in creating Translator object" + str(e)}
49
- status = obj.get_translator()
50
- return status
51
-
52
-
53
- api.add_resource(Translate, '/api/translate')
54
-
55
- if __name__ == '__main__':
56
- app.run(host='0.0.0.0', port=7860)