Spaces:

sasank-229
/

major_project

Runtime error

App Files Files Community

major_project / app.py

sasank-229

Upload 2 files

11bce6b verified 11 months ago

raw

history blame

4.78 kB

	from flask import Flask, request, render_template,url_for, current_app, abort
	from tqdm import tqdm
	import numpy as np
	# import nbformat
	# from nbconvert import PythonExporter
	# import os
	import torch
	from transformers import AutoModel,AutoTokenizer
	import pickle
	from xgboost import XGBClassifier

	app = Flask(__name__)

	# Load the model during the application startup
	# @before_first_request
	def load_model():
	try:
	with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
	current_app.clf = pickle.load(file)
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	abort(500) # Internal Server Error
	app.before_first_request(load_model)

	def model_extract(input_string):
	param ={'maxLen' :256,}
	model = AutoModel.from_pretrained("ai4bharat/indic-bert")
	tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")

	def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
	padded_sequences = []
	for seq in sequences:
	if padding == 'pre':
	padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
	elif padding == 'post':
	padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
	else:
	raise ValueError("Padding should be 'pre' or 'post'.")

	if truncating == 'pre':
	padded_seq = padded_seq[-maxlen:]
	elif truncating == 'post':
	padded_seq = padded_seq[:maxlen]
	else:
	raise ValueError("Truncating should be 'pre' or 'post'.")

	padded_sequences.append(padded_seq)

	return np.array(padded_sequences, dtype=dtype)


	def create_attention_masks(input_ids):
	attention_masks = []
	for seq in tqdm(input_ids):
	seq_mask = [float(i>0) for i in seq]
	attention_masks.append(seq_mask)
	return np.array(attention_masks)

	def getFeaturesandLabel(single_string, label):
	# Wrap the single string in a list
	sentences = ["[CLS] " + single_string + " [SEP]"]

	# Tokenize and preprocess
	tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
	input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]

	# Pad sequences and create attention masks
	input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
	attention_masks_data = create_attention_masks(input_ids)

	# Convert to torch tensors
	X_data = torch.tensor(input_ids)
	attention_masks_data = torch.tensor(attention_masks_data)
	y_data = torch.tensor(label)

	return X_data, attention_masks_data, y_data

	text_input=input_string
	label_input = [0]
	X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
	return X_data


	# def model_heart():
	# # Path to the notebook file
	# notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
	# # Read the notebook content
	# with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
	# notebook_content = nbformat.read(notebook_file, as_version=4)
	# # Create a PythonExporter
	# python_exporter = PythonExporter()
	# # Convert the notebook to a Python script
	# python_script, _ = python_exporter.from_notebook_node(notebook_content)
	# print(python_script)
	# # Execute the Python script
	# exec(python_script)

	# model_heart()
	# Now you can use the variables and functions defined in the notebook in your app.py
	from tempCodeRunnerFile import match
	@app.route('/')
	def index():
	return render_template('index.html')

	@app.route('/predict' ,methods=['POST','GET'])
	def predict():
	input_string=request.form['text']
	print('text: ',input_string)
	with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
	clf=pickle.load(file)

	if any(c in input_string for c in match):
	prediction = [0]
	else:
	ans=model_extract(input_string)
	print('torch.tensor variable: ',ans)
	prediction = clf.predict(ans)

	print('prediction=',prediction)
	if prediction==[0]:
	return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string)
	else:
	return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string)

	if __name__ == "__main__":
	app.run(debug=True,port=8001)

	#for creating a pickle file:
	# with open('classifier.pkl','wb') as file:
	# pickle.dump(xgb, file)