File size: 1,356 Bytes
5f0a349
 
4bd7b39
 
5f0a349
89dd83c
 
5f0a349
 
 
 
 
 
 
 
 
 
 
89dd83c
 
 
5f0a349
 
 
 
 
 
 
 
 
89dd83c
 
 
 
 
 
 
 
 
 
d8f90a3
 
568df48
 
 
 
 
89dd83c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from fastapi import FastAPI
from pydantic import BaseModel
from model.model import predict_pipeline
from model.model import __version__ as model_version

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline

app = FastAPI()


class TextIn(BaseModel):
    text: str


class PredictionOut(BaseModel):
    language: str

class TopicClassificationOut(BaseModel):
    result: str


@app.get("/")
def home():
    return {"health_check": "OK", "model_version": model_version}


@app.post("/predict", response_model=PredictionOut)
def predict(payload: TextIn):
    language = predict_pipeline(payload.text)
    return {"language": language}

@app.post("/TopicClassification", response_model=TopicClassificationOut)
def TopicClassification(payload: TextIn):
    model_name = 'lincoln/flaubert-mlsum-topic-classification'
    
    loaded_tokenizer = AutoTokenizer.from_pretrained(model_name)
    loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name)
    
    nlp = TextClassificationPipeline(model=loaded_model, tokenizer=loaded_tokenizer)

    print(payload.text)
    text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', " ", payload.text)
    text = re.sub(r"[[]]", " ", text)
    text = text.lower()
    
    result = nlp(text, truncation=True)
    return {"result": result}