pankaj goyal commited on
Commit
9129286
1 Parent(s): 25bfc0c

modifield result.html

Browse files
Files changed (5) hide show
  1. Dockerfile +1 -0
  2. main.py +158 -63
  3. requirements.txt +2 -1
  4. templates/index.html +1 -1
  5. templates/result.html +17 -0
Dockerfile CHANGED
@@ -10,6 +10,7 @@ ENV HF_HOME=/code/cache/huggingface
10
  RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
11
 
12
  COPY . /app
 
13
  EXPOSE 7860
14
  # Increase Gunicorn timeout to prevent worker timeout during long initializations
15
  CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]
 
10
  RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
11
 
12
  COPY . /app
13
+
14
  EXPOSE 7860
15
  # Increase Gunicorn timeout to prevent worker timeout during long initializations
16
  CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]
main.py CHANGED
@@ -1,28 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # from flask import Flask, request, jsonify, render_template
2
  # from transformers import RobertaTokenizer, RobertaForSequenceClassification
3
  # from bs4 import BeautifulSoup
4
  # from langdetect import detect
5
- # import torch
6
  # import json
 
7
  # import os
8
 
9
  # app = Flask(__name__)
10
  # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
11
 
 
 
 
12
  # # Load model and tokenizer
13
  # MODEL_PATH = "pankaj100567/Intent-classification"
14
- # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
15
- # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
16
- # model.eval().to(device)
17
-
18
- # # Load label mappings from a JSON file
19
- # solution_file_path = os.path.join('surprise.solution')
20
- # with open(solution_file_path, 'r') as solutions_file:
21
- # labels = [json.loads(line)['intent'] for line in solutions_file]
22
-
23
- # label2id = {label: i for i, label in enumerate(set(labels))}
24
- # id2label = {i: label for label, i in label2id.items()}
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # @app.route('/')
27
  # def index():
28
  # return render_template('index.html')
@@ -40,22 +130,56 @@
40
  # encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
41
  # input_ids = encodings['input_ids'].to(device)
42
  # attention_mask = encodings['attention_mask'].to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # with torch.no_grad():
45
- # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
 
 
 
46
  # logits = outputs.logits
 
 
47
  # probabilities = torch.softmax(logits, dim=1)
48
- # predicted_class_index = probabilities.argmax().item()
49
 
50
- # predicted_intent = id2label[predicted_class_index]
51
- # return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
52
 
53
- # except Exception as e:
54
- # return jsonify({"error": str(e)})
 
55
 
56
- # if __name__ == '__main__':
57
- # app.run(debug=True)
 
 
 
 
 
 
 
58
 
 
 
59
  from flask import Flask, request, jsonify, render_template
60
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
61
  from bs4 import BeautifulSoup
@@ -80,8 +204,8 @@ if not os.path.exists(cache_dir):
80
  # os.makedirs(cache_dir)
81
  # Load model and tokenizer
82
  MODEL_PATH = "pankaj100567/Intent-classification"
83
- tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
84
- model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
85
  # model.eval().to(device)
86
 
87
 
@@ -125,62 +249,33 @@ def classify():
125
  cleaned_sentence = soup.get_text().strip()
126
 
127
  if detect(cleaned_sentence) != 'en':
128
- return jsonify({"error": "Please enter the sentence in English."})
129
 
130
  encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
131
  input_ids = encodings['input_ids'].to(device)
132
  attention_mask = encodings['attention_mask'].to(device)
133
- # Create a TensorDataset
134
- test_dataset = TensorDataset(input_ids, attention_mask,)
135
-
136
- # Define batch size
137
- batch_size = 32
138
 
139
- # Create a DataLoader
140
- test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
141
- # Set the model in evaluation mode
142
  model.eval()
143
-
144
- # Iterate through the batches in the DataLoader
145
  for batch in test_dataloader:
146
- # Unpack the batch
147
  input_ids, attention_mask = batch
148
-
149
- # Move tensors to the device (e.g., GPU if available)
150
  input_ids = input_ids.to(device)
151
  attention_mask = attention_mask.to(device)
152
 
153
-
154
- # Forward pass to get logits
155
  with torch.no_grad():
156
  outputs = model(input_ids=input_ids, attention_mask=attention_mask)
157
-
158
- # Extract the logits tensor from the outputs
159
- logits = outputs.logits
160
-
161
- # Apply softmax to get class probabilities
162
- probabilities = torch.softmax(logits, dim=1)
163
-
164
- # Get the predicted class (index with the highest probability)
165
- predicted_class = torch.argmax(probabilities, dim=1)
166
-
167
-
168
- # Append the predicted class to the list of predictions
169
- # predictions.extend(predicted_class.tolist())
170
-
171
- # with torch.no_grad():
172
- # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
173
- # logits = outputs.logits
174
- # probabilities = torch.softmax(logits, dim=1)
175
- # predicted_class_index = probabilities.argmax().item()
176
 
177
  predicted_intent = id2label[predicted_class]
178
- print(predicted_class, predicted_intent)
179
- return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
180
 
181
  except Exception as e:
182
- return jsonify({"error": str(e)})
 
183
 
184
  # if __name__ == '__main__':
185
  # app.run(debug=True)
186
-
 
1
+ # # from flask import Flask, request, jsonify, render_template
2
+ # # from transformers import RobertaTokenizer, RobertaForSequenceClassification
3
+ # # from bs4 import BeautifulSoup
4
+ # # from langdetect import detect
5
+ # # import torch
6
+ # # import json
7
+ # # import os
8
+
9
+ # # app = Flask(__name__)
10
+ # # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
11
+
12
+ # # # Load model and tokenizer
13
+ # # MODEL_PATH = "pankaj100567/Intent-classification"
14
+ # # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
15
+ # # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
16
+ # # model.eval().to(device)
17
+
18
+ # # # Load label mappings from a JSON file
19
+ # # solution_file_path = os.path.join('surprise.solution')
20
+ # # with open(solution_file_path, 'r') as solutions_file:
21
+ # # labels = [json.loads(line)['intent'] for line in solutions_file]
22
+
23
+ # # label2id = {label: i for i, label in enumerate(set(labels))}
24
+ # # id2label = {i: label for label, i in label2id.items()}
25
+
26
+ # # @app.route('/')
27
+ # # def index():
28
+ # # return render_template('index.html')
29
+
30
+ # # @app.route('/classify', methods=['POST'])
31
+ # # def classify():
32
+ # # try:
33
+ # # sentence = request.form['sentence']
34
+ # # soup = BeautifulSoup(sentence, "html.parser")
35
+ # # cleaned_sentence = soup.get_text().strip()
36
+
37
+ # # if detect(cleaned_sentence) != 'en':
38
+ # # return jsonify({"error": "Please enter the sentence in English."})
39
+
40
+ # # encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
41
+ # # input_ids = encodings['input_ids'].to(device)
42
+ # # attention_mask = encodings['attention_mask'].to(device)
43
+
44
+ # # with torch.no_grad():
45
+ # # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
46
+ # # logits = outputs.logits
47
+ # # probabilities = torch.softmax(logits, dim=1)
48
+ # # predicted_class_index = probabilities.argmax().item()
49
+
50
+ # # predicted_intent = id2label[predicted_class_index]
51
+ # # return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
52
+
53
+ # # except Exception as e:
54
+ # # return jsonify({"error": str(e)})
55
+
56
+ # # if __name__ == '__main__':
57
+ # # app.run(debug=True)
58
+
59
  # from flask import Flask, request, jsonify, render_template
60
  # from transformers import RobertaTokenizer, RobertaForSequenceClassification
61
  # from bs4 import BeautifulSoup
62
  # from langdetect import detect
63
+ # from torch.utils.data import DataLoader, TensorDataset
64
  # import json
65
+ # import torch
66
  # import os
67
 
68
  # app = Flask(__name__)
69
  # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
70
+ # cache_dir = "/code/cache/huggingface"
71
+ # if not os.path.exists(cache_dir):
72
+ # try:
73
+ # os.makedirs(cache_dir)
74
+ # os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
75
+ # except Exception as e:
76
+ # print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
77
 
78
+ # # cache_dir = "/code/cache/huggingface"
79
+ # # if not os.path.exists(cache_dir):
80
+ # # os.makedirs(cache_dir)
81
  # # Load model and tokenizer
82
  # MODEL_PATH = "pankaj100567/Intent-classification"
83
+ # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
84
+ # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
85
+ # # model.eval().to(device)
86
+
87
+
88
+ # # Load label mappings
89
+ # solution_file_path=os.path.join('surprise.solution')
90
+ # # test_data_path=os.path.join(data_path,'massive_test.data')
91
+ # # loading surprise.solution file for getting id2label and label2id mapping
92
+ # with open(solution_file_path,'r') as solutions_file:
93
+ # solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
94
+
95
+ # labels_list=[]
96
+ # for label in solutions:
97
+ # labels_list.append(label['intent'])
98
+ # unique_labels_list=[]
99
+ # for x in labels_list:
100
+ # if x not in unique_labels_list:
101
+ # unique_labels_list.append(x)
102
+ # # unique_labels_list, len(unique_labels_list)
103
+
104
+ # label2id={}
105
+ # id2label={}
106
+ # for i, label in enumerate(unique_labels_list):
107
+ # label2id[label]=i
108
+ # id2label[i]=label
109
+ # # # Load label mappings from a JSON file
110
+ # # solution_file_path = os.path.join('surprise.solution')
111
+ # # with open(solution_file_path, 'r') as solutions_file:
112
+ # # labels = [json.loads(line)['intent'] for line in solutions_file]
113
+
114
+ # # label2id = {label: i for i, label in enumerate(set(labels))}
115
+ # # id2label = {i: label for label, i in label2id.items()}
116
  # @app.route('/')
117
  # def index():
118
  # return render_template('index.html')
 
130
  # encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
131
  # input_ids = encodings['input_ids'].to(device)
132
  # attention_mask = encodings['attention_mask'].to(device)
133
+ # # Create a TensorDataset
134
+ # test_dataset = TensorDataset(input_ids, attention_mask,)
135
+
136
+ # # Define batch size
137
+ # batch_size = 32
138
+
139
+ # # Create a DataLoader
140
+ # test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
141
+ # # Set the model in evaluation mode
142
+ # model.eval()
143
+
144
+ # # Iterate through the batches in the DataLoader
145
+ # for batch in test_dataloader:
146
+ # # Unpack the batch
147
+ # input_ids, attention_mask = batch
148
+
149
+ # # Move tensors to the device (e.g., GPU if available)
150
+ # input_ids = input_ids.to(device)
151
+ # attention_mask = attention_mask.to(device)
152
+
153
 
154
+ # # Forward pass to get logits
155
+ # with torch.no_grad():
156
+ # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
157
+
158
+ # # Extract the logits tensor from the outputs
159
  # logits = outputs.logits
160
+
161
+ # # Apply softmax to get class probabilities
162
  # probabilities = torch.softmax(logits, dim=1)
 
163
 
164
+ # # Get the predicted class (index with the highest probability)
165
+ # predicted_class = torch.argmax(probabilities, dim=1)
166
 
167
+
168
+ # # Append the predicted class to the list of predictions
169
+ # # predictions.extend(predicted_class.tolist())
170
 
171
+ # # with torch.no_grad():
172
+ # # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
173
+ # # logits = outputs.logits
174
+ # # probabilities = torch.softmax(logits, dim=1)
175
+ # # predicted_class_index = probabilities.argmax().item()
176
+
177
+ # predicted_intent = id2label[predicted_class]
178
+ # print(predicted_class, predicted_intent)
179
+ # return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
180
 
181
+ # except Exception as e:
182
+ # return jsonify({"error": str(e)})
183
  from flask import Flask, request, jsonify, render_template
184
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
185
  from bs4 import BeautifulSoup
 
204
  # os.makedirs(cache_dir)
205
  # Load model and tokenizer
206
  MODEL_PATH = "pankaj100567/Intent-classification"
207
+ tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir= cache_dir)
208
+ model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir= cache_dir, num_labels=150)
209
  # model.eval().to(device)
210
 
211
 
 
249
  cleaned_sentence = soup.get_text().strip()
250
 
251
  if detect(cleaned_sentence) != 'en':
252
+ return render_template('result.html', error="Please enter the sentence in English.")
253
 
254
  encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
255
  input_ids = encodings['input_ids'].to(device)
256
  attention_mask = encodings['attention_mask'].to(device)
 
 
 
 
 
257
 
258
+ test_dataset = TensorDataset(input_ids, attention_mask)
259
+ test_dataloader = DataLoader(test_dataset, batch_size=1) # Assume a batch size of 1 for individual predictions
 
260
  model.eval()
261
+
 
262
  for batch in test_dataloader:
 
263
  input_ids, attention_mask = batch
 
 
264
  input_ids = input_ids.to(device)
265
  attention_mask = attention_mask.to(device)
266
 
 
 
267
  with torch.no_grad():
268
  outputs = model(input_ids=input_ids, attention_mask=attention_mask)
269
+ logits = outputs.logits
270
+ probabilities = torch.softmax(logits, dim=1)
271
+ predicted_class = torch.argmax(probabilities, dim=1).item()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
  predicted_intent = id2label[predicted_class]
274
+ return render_template('result.html', intent=predicted_intent, sentence=cleaned_sentence)
 
275
 
276
  except Exception as e:
277
+ return render_template('result.html', error=str(e))
278
+
279
 
280
  # if __name__ == '__main__':
281
  # app.run(debug=True)
 
requirements.txt CHANGED
@@ -18,6 +18,7 @@ tokenizers
18
  scikit-learn
19
  gradio
20
  nltk
 
21
  langdetect
22
  beautifulsoup4
23
- gunicorn
 
18
  scikit-learn
19
  gradio
20
  nltk
21
+ gunicorn
22
  langdetect
23
  beautifulsoup4
24
+
templates/index.html CHANGED
@@ -12,7 +12,7 @@
12
  <select id="intent-list">
13
  <option value="">-- Select an Intent --</option>
14
  </select>
15
-
16
  <h2>Test Your Sentence</h2>
17
  <form action="/classify" method="post">
18
  <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
 
12
  <select id="intent-list">
13
  <option value="">-- Select an Intent --</option>
14
  </select>
15
+ <h3> This website may give sometime wrong important regarding intent , because it is under maintenance </h3>
16
  <h2>Test Your Sentence</h2>
17
  <form action="/classify" method="post">
18
  <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
templates/result.html ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/result.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Classification Result</title>
6
+ </head>
7
+ <body>
8
+ <h1>Classification Result</h1>
9
+ {% if error %}
10
+ <p>Error: {{ error }}</p>
11
+ {% else %}
12
+ <p>Sentence: "{{ sentence }}"</p>
13
+ <p>Intent: {{ intent }}</p>
14
+ {% endif %}
15
+ <a href="{{ url_for('index') }}">Try another sentence</a>
16
+ </body>
17
+ </html>