major_project / app.py
sasank-229's picture
Upload 2 files
11bce6b verified
raw
history blame
4.78 kB
from flask import Flask, request, render_template,url_for, current_app, abort
from tqdm import tqdm
import numpy as np
# import nbformat
# from nbconvert import PythonExporter
# import os
import torch
from transformers import AutoModel,AutoTokenizer
import pickle
from xgboost import XGBClassifier
app = Flask(__name__)
# Load the model during the application startup
# @before_first_request
def load_model():
try:
with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
current_app.clf = pickle.load(file)
except Exception as e:
print(f"Error loading model: {str(e)}")
abort(500) # Internal Server Error
app.before_first_request(load_model)
def model_extract(input_string):
param ={'maxLen' :256,}
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
padded_sequences = []
for seq in sequences:
if padding == 'pre':
padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
elif padding == 'post':
padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
else:
raise ValueError("Padding should be 'pre' or 'post'.")
if truncating == 'pre':
padded_seq = padded_seq[-maxlen:]
elif truncating == 'post':
padded_seq = padded_seq[:maxlen]
else:
raise ValueError("Truncating should be 'pre' or 'post'.")
padded_sequences.append(padded_seq)
return np.array(padded_sequences, dtype=dtype)
def create_attention_masks(input_ids):
attention_masks = []
for seq in tqdm(input_ids):
seq_mask = [float(i>0) for i in seq]
attention_masks.append(seq_mask)
return np.array(attention_masks)
def getFeaturesandLabel(single_string, label):
# Wrap the single string in a list
sentences = ["[CLS] " + single_string + " [SEP]"]
# Tokenize and preprocess
tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]
# Pad sequences and create attention masks
input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
attention_masks_data = create_attention_masks(input_ids)
# Convert to torch tensors
X_data = torch.tensor(input_ids)
attention_masks_data = torch.tensor(attention_masks_data)
y_data = torch.tensor(label)
return X_data, attention_masks_data, y_data
text_input=input_string
label_input = [0]
X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
return X_data
# def model_heart():
# # Path to the notebook file
# notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
# # Read the notebook content
# with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
# notebook_content = nbformat.read(notebook_file, as_version=4)
# # Create a PythonExporter
# python_exporter = PythonExporter()
# # Convert the notebook to a Python script
# python_script, _ = python_exporter.from_notebook_node(notebook_content)
# print(python_script)
# # Execute the Python script
# exec(python_script)
# model_heart()
# Now you can use the variables and functions defined in the notebook in your app.py
from tempCodeRunnerFile import match
@app.route('/')
def index():
return render_template('index.html')
@app.route('/predict' ,methods=['POST','GET'])
def predict():
input_string=request.form['text']
print('text: ',input_string)
with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
clf=pickle.load(file)
if any(c in input_string for c in match):
prediction = [0]
else:
ans=model_extract(input_string)
print('torch.tensor variable: ',ans)
prediction = clf.predict(ans)
print('prediction=',prediction)
if prediction==[0]:
return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string)
else:
return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string)
if __name__ == "__main__":
app.run(debug=True,port=8001)
#for creating a pickle file:
# with open('classifier.pkl','wb') as file:
# pickle.dump(xgb, file)