from fastapi import FastAPI from pydantic import BaseModel from model.model import predict_pipeline from model.model import __version__ as model_version from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import TextClassificationPipeline import requests app = FastAPI() class TextIn(BaseModel): text: str class PredictionOut(BaseModel): language: str class TopicClassificationOut(BaseModel): result: str @app.get("/") def home(): return {"health_check": "OK", "model_version": model_version} @app.post("/predict", response_model=PredictionOut) def predict(payload: TextIn): language = predict_pipeline(payload.text) return {"language": language} @app.post("/TopicClassification", response_model=TopicClassificationOut) def TopicClassification(payload: TextIn): model_name = 'lincoln/flaubert-mlsum-topic-classification' loaded_tokenizer = AutoTokenizer.from_pretrained(model_name) loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name) nlp = TextClassificationPipeline(model=loaded_model, tokenizer=loaded_tokenizer) # print(payload.text) # text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', " ", payload.text) text = "Le Bayern Munich prend la grenadine." text = re.sub(r"[[]]", " ", text) text = text.lower() result = nlp(text, truncation=True) return {"result": result} # https://hinacortus-api-knowyouraudience.hf.space/whichsocial/myspace # https://hinacortus-api-knowyouraudience.hf.space/whichsocial/www.reddit.com @app.get("/whichsocial/{request}") def whichsocial(request): socialnetwork = "not found" if ('http' in request or 'https' in request or 'www' in request or '.com' in request or '.fr' in request): website = "ok" listsocialnetwork = ['facebook', 'youtube', 'myspace', 'linkedin', 'twitter', 'instagram', 'github', 'reddit', 'picterest', 'discord', ''] for partsplit in request.split('/'): for part in partsplit.split('.'): for sn in listsocialnetwork: if sn == part: print('found part ', sn) socialnetwork = sn else: website = "it's not a website link !" userprofile = 'me' return {"request": request, "website":website, "social_network": socialnetwork, "user_profile": userprofile} # https://hinacortus-api-knowyouraudience.hf.space/content/https://www.20minutes.fr/auto/4039741-20230604-vente-encheres-citroen-2cv-bois-unique-monde-adjugee-210-000-euros @app.get("/content/{request}") def analyzecontent(request): content = requests.get(request.text) len_content = len(content.split(' ')) return {"number_of_word":len_content}