HussainM899's picture
Create app.py
7261c03 verified
# -*- coding: utf-8 -*-
"""Text_Classification_Model_Deployment.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/16FpeDQ0i5k_mttZZgxLDHVOMEd-6qGRU
# **Text Classification Model Deployment using FastAPI and Gradio**
"""
"""- ### Importing Libraries"""
# Basic imports for data manipulation and visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# scikit-learn imports for model loading and possibly preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Joblib or Pickle for loading your trained model
import joblib
import pickle
import os
print("Current Working Directory: ", os.getcwd())
# List files in the current directory
print("Files in Current Directory: ", os.listdir('.'))
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')
# Import the necessary libraries for preprocessing and deployment
import re
import joblib
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from fastapi import FastAPI
# Define the custom function for text cleaning
def clean_text(text):
# Remove HTML tags
text = re.sub(r'<.*?>', '', text)
# Remove non-alphabetic characters and lowercase the text
text = re.sub(r'[^a-zA-Z\s]', '', text, re.I|re.A).lower()
# Tokenization
tokens = text.split()
# Remove stopwords and lemmatize
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Load your trained model
model = joblib.load('text_classification_LR_model (1).joblib')
# Load the TF-IDF vectorizer
tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')
# Preprocessing function for input text
def preprocess(input_text):
# Apply text cleaning
input_text = clean_text(input_text)
input_text = [input_text]
# Transform input text using TF-IDF vectorizer
input_text = tfidf_vectorizer.transform(input_text)
return input_text
# Predict the class for the input text
def predict_class(input_text):
input_text = preprocess(input_text)
prediction = model.predict(input_text)
classes = ['World', 'Sports', 'Business', 'Sci/Tech']
predicted_class = classes[prediction[0]]
return predicted_class
# FastAPI app
app = FastAPI()
@app.get('/')
async def welcome():
return "Welcome to the Text Classification API"
@app.post('/classify_text')
async def classify_text(input_text: str):
prediction = predict_class(input_text)
return {"classification": prediction}
import gradio as gr
# Create Gradio interface
iface = gr.Interface(fn=predict_class,
inputs="text",
outputs="text",
title="Text Classification API",
description="Enter text to classify it into categories: World, Sports, Business, Sci/Tech.")
iface.launch()