File size: 2,654 Bytes
89e2590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# Built-in
from pathlib import Path
import uvicorn
import re

# Dependencies for FastAPI
from fastapi import FastAPI
from fastapi.responses import RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from transformers import RobertaTokenizerFast, TFRobertaForSequenceClassification, pipeline

# Class for Text Body
class Paragraph(BaseModel):
    input: str

# Load the EmoRoBERTa Model
tokenizer = RobertaTokenizerFast.from_pretrained("arpanghoshal/EmoRoBERTa")
model = TFRobertaForSequenceClassification.from_pretrained("arpanghoshal/EmoRoBERTa")
emotion = pipeline('sentiment-analysis', model='arpanghoshal/EmoRoBERTa', return_all_scores= True)

# Start the app
app = FastAPI()

# Setup CORS policy
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

def remove_unknown_symbols(text):
    # Define a regular expression pattern to match characters that are not within the range of alphanumeric, space, and common punctuation characters
    pattern = re.compile(r'[^A-Za-z0-9\s.,?!\'"-]')
    # Replace unknown symbols with an empty string
    cleaned_text = re.sub(pattern, '', text)
    # Truncate the text if its length exceeds 1020 characters
    return cleaned_text[:1020]

# APIs
@app.get("/")
async def docs():
    return RedirectResponse(url="/docs")

@app.post("/emoroberta")
async def predict_emotions_emoroberta(paragraph : Paragraph):
    # Split the huge chunk of text into a list of strings
    text_list = [text.strip() for text in re.split(r'[.!?;\n]', paragraph.input) if text.strip()]

    # Create a list to store predictions per text
    predictions_per_text = []
    for text in text_list:
      cleaned_text = remove_unknown_symbols(text)
      emotions = emotion(cleaned_text)[0]
      predictions_per_text.append(emotions)

    # Create a dictionary to aggregate scores for each label
    total = {}

    # Iterate over each list and aggregate the scores
    for prediction in predictions_per_text:
        for emotion_dict in prediction:
            label = emotion_dict['label']
            score = emotion_dict['score']
            total[label] = total.get(label, 0) + score

    # Convert the dictionary to a list of dictionaries
    result = [{"label": label, "score": score} for label, score in total.items()]

    # Sort the result in descending order based on score
    sorted_result = sorted(result, key=lambda x: x['score'], reverse=True)

    return {"predictions": sorted_result}

# if __name__ == "__main__":
#   uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True)