File size: 2,795 Bytes
5f0a349
 
4bd7b39
 
5f0a349
89dd83c
 
5f0a349
fa9e886
 
5f0a349
 
 
 
 
 
 
 
 
 
89dd83c
 
 
5f0a349
 
 
 
 
 
 
 
 
89dd83c
 
 
 
 
 
 
 
 
 
d8f90a3
3d86fd5
 
 
568df48
 
 
 
89dd83c
17fc033
23ba778
652d27e
53f286f
17fc033
b8e2295
e365617
97c72c7
23ba778
 
ea241b1
7fb79d9
fce53c3
 
 
 
97c72c7
 
17fc033
652d27e
 
 
 
 
fa9e886
 
652d27e
fa9e886
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from fastapi import FastAPI
from pydantic import BaseModel
from model.model import predict_pipeline
from model.model import __version__ as model_version

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline

import requests

app = FastAPI()


class TextIn(BaseModel):
    text: str


class PredictionOut(BaseModel):
    language: str

class TopicClassificationOut(BaseModel):
    result: str


@app.get("/")
def home():
    return {"health_check": "OK", "model_version": model_version}


@app.post("/predict", response_model=PredictionOut)
def predict(payload: TextIn):
    language = predict_pipeline(payload.text)
    return {"language": language}

@app.post("/TopicClassification", response_model=TopicClassificationOut)
def TopicClassification(payload: TextIn):
    model_name = 'lincoln/flaubert-mlsum-topic-classification'
    
    loaded_tokenizer = AutoTokenizer.from_pretrained(model_name)
    loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name)
    
    nlp = TextClassificationPipeline(model=loaded_model, tokenizer=loaded_tokenizer)

    # print(payload.text)
    # text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', " ", payload.text)
    text = "Le Bayern Munich prend la grenadine."
    text = re.sub(r"[[]]", " ", text)
    text = text.lower()
    
    result = nlp(text, truncation=True)
    return {"result": result}

# https://hinacortus-api-knowyouraudience.hf.space/whichsocial/myspace
# https://hinacortus-api-knowyouraudience.hf.space/whichsocial/www.reddit.com
@app.get("/whichsocial/{request}")
def whichsocial(request):
    socialnetwork = "not found"
    if ('http' in request or 'https' in request or 'www' in request or '.com' in request or '.fr' in request):
        website = "ok"
        listsocialnetwork = ['facebook', 'youtube', 'myspace', 'linkedin', 'twitter', 'instagram', 'github',
                            'reddit', 'picterest', 'discord', '']
        for partsplit in request.split('/'):
            for part in partsplit.split('.'):
                for sn in listsocialnetwork:
                    if sn == part:
                        print('found part ', sn)
                        socialnetwork = sn
    else:
        website = "it's not a website link !"
    userprofile = 'me'
    return {"request": request, "website":website, "social_network": socialnetwork, "user_profile": userprofile}

# https://hinacortus-api-knowyouraudience.hf.space/content/https://www.20minutes.fr/auto/4039741-20230604-vente-encheres-citroen-2cv-bois-unique-monde-adjugee-210-000-euros
@app.get("/content/{request}")
def analyzecontent(request):
    content = requests.get(request.text)
    len_content = len(content.split(' '))
    return {"number_of_word":len_content}