sasank-229 commited on
Commit
11bce6b
1 Parent(s): b44c0fd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +130 -0
  2. tempCodeRunnerFile.py +1 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template,url_for, current_app, abort
2
+ from tqdm import tqdm
3
+ import numpy as np
4
+ # import nbformat
5
+ # from nbconvert import PythonExporter
6
+ # import os
7
+ import torch
8
+ from transformers import AutoModel,AutoTokenizer
9
+ import pickle
10
+ from xgboost import XGBClassifier
11
+
12
+ app = Flask(__name__)
13
+
14
+ # Load the model during the application startup
15
+ # @before_first_request
16
+ def load_model():
17
+ try:
18
+ with open('static/ipynbFiles/classifier2.pkl', 'rb') as file:
19
+ current_app.clf = pickle.load(file)
20
+ except Exception as e:
21
+ print(f"Error loading model: {str(e)}")
22
+ abort(500) # Internal Server Error
23
+ app.before_first_request(load_model)
24
+
25
+ def model_extract(input_string):
26
+ param ={'maxLen' :256,}
27
+ model = AutoModel.from_pretrained("ai4bharat/indic-bert")
28
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
29
+
30
+ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0):
31
+ padded_sequences = []
32
+ for seq in sequences:
33
+ if padding == 'pre':
34
+ padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value)
35
+ elif padding == 'post':
36
+ padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value)
37
+ else:
38
+ raise ValueError("Padding should be 'pre' or 'post'.")
39
+
40
+ if truncating == 'pre':
41
+ padded_seq = padded_seq[-maxlen:]
42
+ elif truncating == 'post':
43
+ padded_seq = padded_seq[:maxlen]
44
+ else:
45
+ raise ValueError("Truncating should be 'pre' or 'post'.")
46
+
47
+ padded_sequences.append(padded_seq)
48
+
49
+ return np.array(padded_sequences, dtype=dtype)
50
+
51
+
52
+ def create_attention_masks(input_ids):
53
+ attention_masks = []
54
+ for seq in tqdm(input_ids):
55
+ seq_mask = [float(i>0) for i in seq]
56
+ attention_masks.append(seq_mask)
57
+ return np.array(attention_masks)
58
+
59
+ def getFeaturesandLabel(single_string, label):
60
+ # Wrap the single string in a list
61
+ sentences = ["[CLS] " + single_string + " [SEP]"]
62
+
63
+ # Tokenize and preprocess
64
+ tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences)))
65
+ input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)]
66
+
67
+ # Pad sequences and create attention masks
68
+ input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post')
69
+ attention_masks_data = create_attention_masks(input_ids)
70
+
71
+ # Convert to torch tensors
72
+ X_data = torch.tensor(input_ids)
73
+ attention_masks_data = torch.tensor(attention_masks_data)
74
+ y_data = torch.tensor(label)
75
+
76
+ return X_data, attention_masks_data, y_data
77
+
78
+ text_input=input_string
79
+ label_input = [0]
80
+ X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input)
81
+ return X_data
82
+
83
+
84
+ # def model_heart():
85
+ # # Path to the notebook file
86
+ # notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb')
87
+ # # Read the notebook content
88
+ # with open(notebook_path, 'r', encoding='utf-8') as notebook_file:
89
+ # notebook_content = nbformat.read(notebook_file, as_version=4)
90
+ # # Create a PythonExporter
91
+ # python_exporter = PythonExporter()
92
+ # # Convert the notebook to a Python script
93
+ # python_script, _ = python_exporter.from_notebook_node(notebook_content)
94
+ # print(python_script)
95
+ # # Execute the Python script
96
+ # exec(python_script)
97
+
98
+ # model_heart()
99
+ # Now you can use the variables and functions defined in the notebook in your app.py
100
+ from tempCodeRunnerFile import match
101
+ @app.route('/')
102
+ def index():
103
+ return render_template('index.html')
104
+
105
+ @app.route('/predict' ,methods=['POST','GET'])
106
+ def predict():
107
+ input_string=request.form['text']
108
+ print('text: ',input_string)
109
+ with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file:
110
+ clf=pickle.load(file)
111
+
112
+ if any(c in input_string for c in match):
113
+ prediction = [0]
114
+ else:
115
+ ans=model_extract(input_string)
116
+ print('torch.tensor variable: ',ans)
117
+ prediction = clf.predict(ans)
118
+
119
+ print('prediction=',prediction)
120
+ if prediction==[0]:
121
+ return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string)
122
+ else:
123
+ return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string)
124
+
125
+ if __name__ == "__main__":
126
+ app.run(debug=True,port=8001)
127
+
128
+ #for creating a pickle file:
129
+ # with open('classifier.pkl','wb') as file:
130
+ # pickle.dump(xgb, file)
tempCodeRunnerFile.py ADDED
@@ -0,0 +1 @@
 
 
1
+ match=["సచ్చినోడ","పప్పు నాయుడు","నీచుడు","యెడవా","పనికిరాణి వాడు","దున్నపోతు","పిచ్చి","దరిద్రుడు","దొంగ","దోచేసాడు","సైకో","లపాకి","కొజ్జ","ముండ","ఎదవ","అడుక్కుతిను","దద్దమ్మ","సిగ్గులేదా","ఎర్రిపుకు","సన్నాసి","పోరంబోకు"]