HussainM899 commited on
Commit
7261c03
·
verified ·
1 Parent(s): 62ddcd7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Text_Classification_Model_Deployment.ipynb
3
+ Automatically generated by Colaboratory.
4
+ Original file is located at
5
+ https://colab.research.google.com/drive/16FpeDQ0i5k_mttZZgxLDHVOMEd-6qGRU
6
+ # **Text Classification Model Deployment using FastAPI and Gradio**
7
+ """
8
+
9
+ """- ### Importing Libraries"""
10
+
11
+ # Basic imports for data manipulation and visualization
12
+ import numpy as np
13
+ import pandas as pd
14
+ import matplotlib.pyplot as plt
15
+ import seaborn as sns
16
+
17
+ # scikit-learn imports for model loading and possibly preprocessing
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
20
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
21
+
22
+ # Joblib or Pickle for loading your trained model
23
+ import joblib
24
+ import pickle
25
+
26
+ import os
27
+ print("Current Working Directory: ", os.getcwd())
28
+ # List files in the current directory
29
+ print("Files in Current Directory: ", os.listdir('.'))
30
+
31
+ import nltk
32
+ nltk.download('punkt')
33
+ nltk.download('wordnet')
34
+ nltk.download('omw-1.4')
35
+ nltk.download('stopwords')
36
+
37
+ # Import the necessary libraries for preprocessing and deployment
38
+ import re
39
+ import joblib
40
+ import nltk
41
+ from nltk.corpus import stopwords
42
+ from nltk.stem import WordNetLemmatizer
43
+ from sklearn.feature_extraction.text import TfidfVectorizer
44
+ from fastapi import FastAPI
45
+
46
+ # Define the custom function for text cleaning
47
+ def clean_text(text):
48
+ # Remove HTML tags
49
+ text = re.sub(r'<.*?>', '', text)
50
+ # Remove non-alphabetic characters and lowercase the text
51
+ text = re.sub(r'[^a-zA-Z\s]', '', text, re.I|re.A).lower()
52
+ # Tokenization
53
+ tokens = text.split()
54
+ # Remove stopwords and lemmatize
55
+ lemmatizer = WordNetLemmatizer()
56
+ stop_words = set(stopwords.words('english'))
57
+ tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
58
+ return ' '.join(tokens)
59
+
60
+ # Load your trained model
61
+ model = joblib.load('text_classification_LR_model (1).joblib')
62
+
63
+ # Load the TF-IDF vectorizer
64
+ tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')
65
+
66
+ # Preprocessing function for input text
67
+ def preprocess(input_text):
68
+ # Apply text cleaning
69
+ input_text = clean_text(input_text)
70
+ input_text = [input_text]
71
+ # Transform input text using TF-IDF vectorizer
72
+ input_text = tfidf_vectorizer.transform(input_text)
73
+ return input_text
74
+
75
+ # Predict the class for the input text
76
+ def predict_class(input_text):
77
+ input_text = preprocess(input_text)
78
+ prediction = model.predict(input_text)
79
+ classes = ['World', 'Sports', 'Business', 'Sci/Tech']
80
+ predicted_class = classes[prediction[0]]
81
+ return predicted_class
82
+
83
+ # FastAPI app
84
+ app = FastAPI()
85
+
86
+ @app.get('/')
87
+ async def welcome():
88
+ return "Welcome to the Text Classification API"
89
+
90
+ @app.post('/classify_text')
91
+ async def classify_text(input_text: str):
92
+ prediction = predict_class(input_text)
93
+ return {"classification": prediction}
94
+
95
+ import gradio as gr
96
+
97
+ # Create Gradio interface
98
+ iface = gr.Interface(fn=predict_class,
99
+ inputs="text",
100
+ outputs="text",
101
+ title="Text Classification API",
102
+ description="Enter text to classify it into categories: World, Sports, Business, Sci/Tech.")
103
+ iface.launch()
104
+
105
+