Spaces:
Runtime error
Runtime error
sasank-229
commited on
Upload 2 files
Browse files- app.py +130 -0
- tempCodeRunnerFile.py +1 -0
app.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template,url_for, current_app, abort
|
2 |
+
from tqdm import tqdm
|
3 |
+
import numpy as np
|
4 |
+
# import nbformat
|
5 |
+
# from nbconvert import PythonExporter
|
6 |
+
# import os
|
7 |
+
import torch
|
8 |
+
from transformers import AutoModel,AutoTokenizer
|
9 |
+
import pickle
|
10 |
+
from xgboost import XGBClassifier
|
11 |
+
|
12 |
+
app = Flask(__name__)
|
13 |
+
|
14 |
+
# Load the model during the application startup
|
15 |
+
# @before_first_request
|
16 |
+
def load_model():
|
17 |
+
try:
|
18 |
+
with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
|
19 |
+
current_app.clf = pickle.load(file)
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error loading model: {str(e)}")
|
22 |
+
abort(500) # Internal Server Error
|
23 |
+
app.before_first_request(load_model)
|
24 |
+
|
25 |
+
def model_extract(input_string):
|
26 |
+
param ={'maxLen' :256,}
|
27 |
+
model = AutoModel.from_pretrained("ai4bharat/indic-bert")
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
|
29 |
+
|
30 |
+
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
|
31 |
+
padded_sequences = []
|
32 |
+
for seq in sequences:
|
33 |
+
if padding == 'pre':
|
34 |
+
padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
|
35 |
+
elif padding == 'post':
|
36 |
+
padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
|
37 |
+
else:
|
38 |
+
raise ValueError("Padding should be 'pre' or 'post'.")
|
39 |
+
|
40 |
+
if truncating == 'pre':
|
41 |
+
padded_seq = padded_seq[-maxlen:]
|
42 |
+
elif truncating == 'post':
|
43 |
+
padded_seq = padded_seq[:maxlen]
|
44 |
+
else:
|
45 |
+
raise ValueError("Truncating should be 'pre' or 'post'.")
|
46 |
+
|
47 |
+
padded_sequences.append(padded_seq)
|
48 |
+
|
49 |
+
return np.array(padded_sequences, dtype=dtype)
|
50 |
+
|
51 |
+
|
52 |
+
def create_attention_masks(input_ids):
|
53 |
+
attention_masks = []
|
54 |
+
for seq in tqdm(input_ids):
|
55 |
+
seq_mask = [float(i>0) for i in seq]
|
56 |
+
attention_masks.append(seq_mask)
|
57 |
+
return np.array(attention_masks)
|
58 |
+
|
59 |
+
def getFeaturesandLabel(single_string, label):
|
60 |
+
# Wrap the single string in a list
|
61 |
+
sentences = ["[CLS] " + single_string + " [SEP]"]
|
62 |
+
|
63 |
+
# Tokenize and preprocess
|
64 |
+
tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
|
65 |
+
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]
|
66 |
+
|
67 |
+
# Pad sequences and create attention masks
|
68 |
+
input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
|
69 |
+
attention_masks_data = create_attention_masks(input_ids)
|
70 |
+
|
71 |
+
# Convert to torch tensors
|
72 |
+
X_data = torch.tensor(input_ids)
|
73 |
+
attention_masks_data = torch.tensor(attention_masks_data)
|
74 |
+
y_data = torch.tensor(label)
|
75 |
+
|
76 |
+
return X_data, attention_masks_data, y_data
|
77 |
+
|
78 |
+
text_input=input_string
|
79 |
+
label_input = [0]
|
80 |
+
X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
|
81 |
+
return X_data
|
82 |
+
|
83 |
+
|
84 |
+
# def model_heart():
|
85 |
+
# # Path to the notebook file
|
86 |
+
# notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
|
87 |
+
# # Read the notebook content
|
88 |
+
# with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
|
89 |
+
# notebook_content = nbformat.read(notebook_file, as_version=4)
|
90 |
+
# # Create a PythonExporter
|
91 |
+
# python_exporter = PythonExporter()
|
92 |
+
# # Convert the notebook to a Python script
|
93 |
+
# python_script, _ = python_exporter.from_notebook_node(notebook_content)
|
94 |
+
# print(python_script)
|
95 |
+
# # Execute the Python script
|
96 |
+
# exec(python_script)
|
97 |
+
|
98 |
+
# model_heart()
|
99 |
+
# Now you can use the variables and functions defined in the notebook in your app.py
|
100 |
+
from tempCodeRunnerFile import match
|
101 |
+
@app.route('/')
|
102 |
+
def index():
|
103 |
+
return render_template('index.html')
|
104 |
+
|
105 |
+
@app.route('/predict' ,methods=['POST','GET'])
|
106 |
+
def predict():
|
107 |
+
input_string=request.form['text']
|
108 |
+
print('text: ',input_string)
|
109 |
+
with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
|
110 |
+
clf=pickle.load(file)
|
111 |
+
|
112 |
+
if any(c in input_string for c in match):
|
113 |
+
prediction = [0]
|
114 |
+
else:
|
115 |
+
ans=model_extract(input_string)
|
116 |
+
print('torch.tensor variable: ',ans)
|
117 |
+
prediction = clf.predict(ans)
|
118 |
+
|
119 |
+
print('prediction=',prediction)
|
120 |
+
if prediction==[0]:
|
121 |
+
return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string)
|
122 |
+
else:
|
123 |
+
return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string)
|
124 |
+
|
125 |
+
if __name__ == "__main__":
|
126 |
+
app.run(debug=True,port=8001)
|
127 |
+
|
128 |
+
#for creating a pickle file:
|
129 |
+
# with open('classifier.pkl','wb') as file:
|
130 |
+
# pickle.dump(xgb, file)
|
tempCodeRunnerFile.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
match=["సచ్చినోడ","పప్పు నాయుడు","నీచుడు","యెడవా","పనికిరాణి వాడు","దున్నపోతు","పిచ్చి","దరిద్రుడు","దొంగ","దోచేసాడు","సైకో","లపాకి","కొజ్జ","ముండ","ఎదవ","అడుక్కుతిను","దద్దమ్మ","సిగ్గులేదా","ఎర్రిపుకు","సన్నాసి","పోరంబోకు"]
|