aminaj commited on
Commit
c62808a
1 Parent(s): 1501229

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel, ValidationError
4
+ from fastapi.encoders import jsonable_encoder
5
+
6
+ # TEXT PREPROCESSING
7
+ # --------------------------------------------------------------------
8
+ import re
9
+ import string
10
+ import nltk
11
+ nltk.download('punkt')
12
+ nltk.download('wordnet')
13
+ nltk.download('omw-1.4')
14
+ from nltk.stem import WordNetLemmatizer
15
+
16
+ # Function to remove URLs from text
17
+ def remove_urls(text):
18
+ return re.sub(r'http[s]?://\S+', '', text)
19
+
20
+ # Function to remove punctuations from text
21
+ def remove_punctuation(text):
22
+ regular_punct = string.punctuation
23
+ return str(re.sub(r'['+regular_punct+']', '', str(text)))
24
+
25
+ # Function to convert the text into lower case
26
+ def lower_case(text):
27
+ return text.lower()
28
+
29
+ # Function to lemmatize text
30
+ def lemmatize(text):
31
+ wordnet_lemmatizer = WordNetLemmatizer()
32
+
33
+ tokens = nltk.word_tokenize(text)
34
+ lemma_txt = ''
35
+ for w in tokens:
36
+ lemma_txt = lemma_txt + wordnet_lemmatizer.lemmatize(w) + ' '
37
+
38
+ return lemma_txt
39
+
40
+ def preprocess_text(text):
41
+ # Preprocess the input text
42
+ text = remove_urls(text)
43
+ text = remove_punctuation(text)
44
+ text = lower_case(text)
45
+ text = lemmatize(text)
46
+ return text
47
+
48
+ # Load the model using FastAPI lifespan event so that teh model is loaded at the beginning for efficiency
49
+ @asynccontextmanager
50
+ async def lifespan(app: FastAPI):
51
+ # Load the model from HuggingFace transformers library
52
+ from transformers import pipeline
53
+ global sentiment_task
54
+ sentiment_task = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
55
+ yield
56
+ # Clean up the model and release the resources
57
+ del sentiment_task
58
+
59
+ # Initialize the FastAPI app
60
+ app = FastAPI(lifespan=lifespan)
61
+
62
+ # Define the input data model
63
+ class TextInput(BaseModel):
64
+ text: str
65
+
66
+ # Define the welcome endpoint
67
+ @app.get('/')
68
+ async def welcome():
69
+ return "Welcome to our Text Classification API"
70
+
71
+ # Validate input text length
72
+ MAX_TEXT_LENGTH = 1000
73
+
74
+ # Define the sentiment analysis endpoint
75
+ @app.post('/analyze/{text}')
76
+ async def classify_text(text_input:TextInput):
77
+ try:
78
+ # Convert input data to JSON serializable dictionary
79
+ text_input_dict = jsonable_encoder(text_input)
80
+ # Validate input data using Pydantic model
81
+ text_data = TextInput(**text_input_dict) # Convert to Pydantic model
82
+
83
+ # Validate input text length
84
+ if len(text_input.text) > MAX_TEXT_LENGTH:
85
+ raise HTTPException(status_code=400, detail="Text length exceeds maximum allowed length")
86
+ elif len(text_input.text) == 0:
87
+ raise HTTPException(status_code=400, detail="Text cannot be empty")
88
+ except ValidationError as e:
89
+ # Handle validation error
90
+ raise HTTPException(status_code=422, detail=str(e))
91
+
92
+ try:
93
+ # Perform text classification
94
+ return sentiment_task(preprocess_text(text_input.text))
95
+ except ValueError as ve:
96
+ # Handle value error
97
+ raise HTTPException(status_code=400, detail=str(ve))
98
+ except Exception as e:
99
+ # Handle other server errors
100
+ raise HTTPException(status_code=500, detail=str(e))