Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- Dockerfile.txt +17 -0
- app.py +34 -0
- models/Flashcardd.py +10 -0
- models/__init__.py +0 -0
- requirements.txt +9 -0
- translations/__init__.py +0 -0
- translations/__pycache__/__init__.cpython-39.pyc +0 -0
- translations/__pycache__/model_name_mapping.cpython-39.pyc +0 -0
- translations/__pycache__/translate.cpython-39.pyc +0 -0
- translations/model_name_mapping.py +47 -0
- translations/translate.py +45 -0
Dockerfile.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dockerfile
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# Install dependencies
|
7 |
+
COPY requirements.txt .
|
8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
9 |
+
|
10 |
+
# Copy the app files
|
11 |
+
COPY . .
|
12 |
+
|
13 |
+
# Expose the port FastAPI will run on
|
14 |
+
EXPOSE 7860
|
15 |
+
|
16 |
+
# Run the application
|
17 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from fastapi import FastAPI, Body, File, UploadFile, Request
|
3 |
+
from fastapi.responses import HTMLResponse, RedirectResponse
|
4 |
+
from fastapi.staticfiles import StaticFiles
|
5 |
+
from fastapi.templating import Jinja2Templates
|
6 |
+
|
7 |
+
from models.Flashcardd import Flashcard
|
8 |
+
from translations.translate import load_model_and_tokenizer, translate
|
9 |
+
|
10 |
+
|
11 |
+
app = FastAPI()
|
12 |
+
|
13 |
+
|
14 |
+
@app.get("/")
|
15 |
+
async def home():
|
16 |
+
return {"message": "hola-mondo"}
|
17 |
+
|
18 |
+
|
19 |
+
@app.post("/flashcards/create_flashcard")
|
20 |
+
async def create_flashcard(new_flashcard=Body()) -> Flashcard:
|
21 |
+
print(f"new flashcard: {new_flashcard}")
|
22 |
+
from_lang = new_flashcard["from"]
|
23 |
+
to_lang = new_flashcard["to"]
|
24 |
+
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
|
25 |
+
translation = translate(new_flashcard["word"], model, tokenizer)
|
26 |
+
|
27 |
+
return Flashcard(
|
28 |
+
name=new_flashcard["word"],
|
29 |
+
translation=translation,
|
30 |
+
# sample_sentence=f"{new_flashcard['word']}",
|
31 |
+
sample_sentence="notes..",
|
32 |
+
# "translation": f"translation: {new_flashcard['word']}",
|
33 |
+
# "translation": translation,
|
34 |
+
)
|
models/Flashcardd.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, constr
|
2 |
+
from typing import Union, Optional
|
3 |
+
|
4 |
+
|
5 |
+
class Flashcard(BaseModel):
|
6 |
+
name: constr(max_length=100, min_length=5) # type: ignore
|
7 |
+
translation: Optional[str] = ""
|
8 |
+
sample_sentence: Optional[str] = ""
|
9 |
+
|
10 |
+
|
models/__init__.py
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
fastapi==0.112.0
|
3 |
+
sentencepiece==0.2.0
|
4 |
+
numpy==1.23.1
|
5 |
+
sacremoses==0.1.1
|
6 |
+
tokenizers==0.19.1
|
7 |
+
transformers==4.43.3
|
8 |
+
huggingface-hub==0.24.5
|
9 |
+
torch
|
translations/__init__.py
ADDED
File without changes
|
translations/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (163 Bytes). View file
|
|
translations/__pycache__/model_name_mapping.cpython-39.pyc
ADDED
Binary file (1.84 kB). View file
|
|
translations/__pycache__/translate.cpython-39.pyc
ADDED
Binary file (1.59 kB). View file
|
|
translations/model_name_mapping.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from typing import Dict, Literal, Tuple
|
3 |
+
from pydantic import BaseModel, ValidationError
|
4 |
+
from enum import Enum
|
5 |
+
|
6 |
+
|
7 |
+
class ModelNameMapping(BaseModel):
|
8 |
+
data: Dict[Tuple, str] = field(default_factory=dict)
|
9 |
+
|
10 |
+
def __getitem__(self, key: Tuple) -> str:
|
11 |
+
return self.data[key]
|
12 |
+
|
13 |
+
def get(self, key: Tuple) -> str:
|
14 |
+
return self.data.get(key, "")
|
15 |
+
|
16 |
+
|
17 |
+
class LanguagesEnum(Enum):
|
18 |
+
es = "es"
|
19 |
+
en = "en"
|
20 |
+
ca = "ca"
|
21 |
+
pt = "pt"
|
22 |
+
|
23 |
+
|
24 |
+
es_to_en: Tuple = (LanguagesEnum.es.value, LanguagesEnum.en.value)
|
25 |
+
es_to_ca: Tuple = (LanguagesEnum.es.value, LanguagesEnum.ca.value)
|
26 |
+
es_to_po: Tuple = (LanguagesEnum.es.value, LanguagesEnum.pt.value)
|
27 |
+
|
28 |
+
en_to_es: Tuple = (LanguagesEnum.en.value, LanguagesEnum.es.value)
|
29 |
+
en_to_po: Tuple = (LanguagesEnum.en.value, LanguagesEnum.pt.value)
|
30 |
+
|
31 |
+
ca_to_es: Tuple = (LanguagesEnum.ca.value, LanguagesEnum.es.value)
|
32 |
+
|
33 |
+
po_to_en: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.en.value)
|
34 |
+
po_to_es: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.es.value)
|
35 |
+
|
36 |
+
models = {
|
37 |
+
es_to_en: "Helsinki-NLP/opus-mt-es-en",
|
38 |
+
es_to_ca: "Helsinki-NLP/opus-mt-es-ca",
|
39 |
+
en_to_es: "Helsinki-NLP/opus-mt-en-es",
|
40 |
+
en_to_po: "Helsinki-NLP/opus-mt-tc-big-en-pt",
|
41 |
+
ca_to_es: "Helsinki-NLP/opus-mt-ca-es",
|
42 |
+
po_to_en: "Helsinki-NLP/opus-mt-pt-en",
|
43 |
+
po_to_es: "Helsinki-NLP/opus-mt-pt-es",
|
44 |
+
}
|
45 |
+
|
46 |
+
|
47 |
+
MODEL_NAME_MAPPING = ModelNameMapping(data=models)
|
translations/translate.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import MarianMTModel, MarianTokenizer
|
2 |
+
from translations.model_name_mapping import MODEL_NAME_MAPPING
|
3 |
+
|
4 |
+
|
5 |
+
def load_model_and_tokenizer(from_lang: str, to_lang: str):
|
6 |
+
print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}")
|
7 |
+
|
8 |
+
model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang))
|
9 |
+
print(f"model_name: {model_name}")
|
10 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
11 |
+
model = MarianMTModel.from_pretrained(model_name)
|
12 |
+
return model, tokenizer
|
13 |
+
|
14 |
+
|
15 |
+
def translate(text, model, tokenizer):
|
16 |
+
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
|
17 |
+
translated_text = [
|
18 |
+
tokenizer.decode(t, skip_special_tokens=True) for t in translated
|
19 |
+
]
|
20 |
+
return translated_text[0]
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
def test_translations(from_lang, to_lang, text_to_translate):
|
25 |
+
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
|
26 |
+
translation = translate(text_to_translate, model, tokenizer)
|
27 |
+
print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}")
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
# text_to_translate = "hola amigos, tengo hambre"
|
32 |
+
|
33 |
+
# test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate)
|
34 |
+
# test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate)
|
35 |
+
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
|
36 |
+
|
37 |
+
text_to_translate = "hello friends, who's hungry?"
|
38 |
+
test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate)
|
39 |
+
# test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate)
|
40 |
+
test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate)
|
41 |
+
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
|
42 |
+
|
43 |
+
|
44 |
+
text_to_translate = "un cafè sense sucre i amb llet, si us plau"
|
45 |
+
test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate)
|