Spaces:
Runtime error
Runtime error
pankaj goyal
commited on
Commit
•
9129286
1
Parent(s):
25bfc0c
modifield result.html
Browse files- Dockerfile +1 -0
- main.py +158 -63
- requirements.txt +2 -1
- templates/index.html +1 -1
- templates/result.html +17 -0
Dockerfile
CHANGED
@@ -10,6 +10,7 @@ ENV HF_HOME=/code/cache/huggingface
|
|
10 |
RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
|
11 |
|
12 |
COPY . /app
|
|
|
13 |
EXPOSE 7860
|
14 |
# Increase Gunicorn timeout to prevent worker timeout during long initializations
|
15 |
CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]
|
|
|
10 |
RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
|
11 |
|
12 |
COPY . /app
|
13 |
+
|
14 |
EXPOSE 7860
|
15 |
# Increase Gunicorn timeout to prevent worker timeout during long initializations
|
16 |
CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]
|
main.py
CHANGED
@@ -1,28 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# from flask import Flask, request, jsonify, render_template
|
2 |
# from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
3 |
# from bs4 import BeautifulSoup
|
4 |
# from langdetect import detect
|
5 |
-
# import
|
6 |
# import json
|
|
|
7 |
# import os
|
8 |
|
9 |
# app = Flask(__name__)
|
10 |
# device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
|
|
|
|
|
|
12 |
# # Load model and tokenizer
|
13 |
# MODEL_PATH = "pankaj100567/Intent-classification"
|
14 |
-
# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
|
15 |
-
# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
|
16 |
-
# model.eval().to(device)
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
#
|
21 |
-
#
|
22 |
-
|
23 |
-
#
|
24 |
-
#
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# @app.route('/')
|
27 |
# def index():
|
28 |
# return render_template('index.html')
|
@@ -40,22 +130,56 @@
|
|
40 |
# encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
41 |
# input_ids = encodings['input_ids'].to(device)
|
42 |
# attention_mask = encodings['attention_mask'].to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
#
|
45 |
-
#
|
|
|
|
|
|
|
46 |
# logits = outputs.logits
|
|
|
|
|
47 |
# probabilities = torch.softmax(logits, dim=1)
|
48 |
-
# predicted_class_index = probabilities.argmax().item()
|
49 |
|
50 |
-
#
|
51 |
-
#
|
52 |
|
53 |
-
|
54 |
-
#
|
|
|
55 |
|
56 |
-
#
|
57 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
|
|
|
|
59 |
from flask import Flask, request, jsonify, render_template
|
60 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
61 |
from bs4 import BeautifulSoup
|
@@ -80,8 +204,8 @@ if not os.path.exists(cache_dir):
|
|
80 |
# os.makedirs(cache_dir)
|
81 |
# Load model and tokenizer
|
82 |
MODEL_PATH = "pankaj100567/Intent-classification"
|
83 |
-
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
|
84 |
-
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
|
85 |
# model.eval().to(device)
|
86 |
|
87 |
|
@@ -125,62 +249,33 @@ def classify():
|
|
125 |
cleaned_sentence = soup.get_text().strip()
|
126 |
|
127 |
if detect(cleaned_sentence) != 'en':
|
128 |
-
return
|
129 |
|
130 |
encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
131 |
input_ids = encodings['input_ids'].to(device)
|
132 |
attention_mask = encodings['attention_mask'].to(device)
|
133 |
-
# Create a TensorDataset
|
134 |
-
test_dataset = TensorDataset(input_ids, attention_mask,)
|
135 |
-
|
136 |
-
# Define batch size
|
137 |
-
batch_size = 32
|
138 |
|
139 |
-
|
140 |
-
test_dataloader = DataLoader(test_dataset, batch_size=
|
141 |
-
# Set the model in evaluation mode
|
142 |
model.eval()
|
143 |
-
|
144 |
-
# Iterate through the batches in the DataLoader
|
145 |
for batch in test_dataloader:
|
146 |
-
# Unpack the batch
|
147 |
input_ids, attention_mask = batch
|
148 |
-
|
149 |
-
# Move tensors to the device (e.g., GPU if available)
|
150 |
input_ids = input_ids.to(device)
|
151 |
attention_mask = attention_mask.to(device)
|
152 |
|
153 |
-
|
154 |
-
# Forward pass to get logits
|
155 |
with torch.no_grad():
|
156 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
# Apply softmax to get class probabilities
|
162 |
-
probabilities = torch.softmax(logits, dim=1)
|
163 |
-
|
164 |
-
# Get the predicted class (index with the highest probability)
|
165 |
-
predicted_class = torch.argmax(probabilities, dim=1)
|
166 |
-
|
167 |
-
|
168 |
-
# Append the predicted class to the list of predictions
|
169 |
-
# predictions.extend(predicted_class.tolist())
|
170 |
-
|
171 |
-
# with torch.no_grad():
|
172 |
-
# outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
173 |
-
# logits = outputs.logits
|
174 |
-
# probabilities = torch.softmax(logits, dim=1)
|
175 |
-
# predicted_class_index = probabilities.argmax().item()
|
176 |
|
177 |
predicted_intent = id2label[predicted_class]
|
178 |
-
|
179 |
-
return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
|
180 |
|
181 |
except Exception as e:
|
182 |
-
return
|
|
|
183 |
|
184 |
# if __name__ == '__main__':
|
185 |
# app.run(debug=True)
|
186 |
-
|
|
|
1 |
+
# # from flask import Flask, request, jsonify, render_template
|
2 |
+
# # from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
3 |
+
# # from bs4 import BeautifulSoup
|
4 |
+
# # from langdetect import detect
|
5 |
+
# # import torch
|
6 |
+
# # import json
|
7 |
+
# # import os
|
8 |
+
|
9 |
+
# # app = Flask(__name__)
|
10 |
+
# # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
|
11 |
+
|
12 |
+
# # # Load model and tokenizer
|
13 |
+
# # MODEL_PATH = "pankaj100567/Intent-classification"
|
14 |
+
# # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
|
15 |
+
# # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
|
16 |
+
# # model.eval().to(device)
|
17 |
+
|
18 |
+
# # # Load label mappings from a JSON file
|
19 |
+
# # solution_file_path = os.path.join('surprise.solution')
|
20 |
+
# # with open(solution_file_path, 'r') as solutions_file:
|
21 |
+
# # labels = [json.loads(line)['intent'] for line in solutions_file]
|
22 |
+
|
23 |
+
# # label2id = {label: i for i, label in enumerate(set(labels))}
|
24 |
+
# # id2label = {i: label for label, i in label2id.items()}
|
25 |
+
|
26 |
+
# # @app.route('/')
|
27 |
+
# # def index():
|
28 |
+
# # return render_template('index.html')
|
29 |
+
|
30 |
+
# # @app.route('/classify', methods=['POST'])
|
31 |
+
# # def classify():
|
32 |
+
# # try:
|
33 |
+
# # sentence = request.form['sentence']
|
34 |
+
# # soup = BeautifulSoup(sentence, "html.parser")
|
35 |
+
# # cleaned_sentence = soup.get_text().strip()
|
36 |
+
|
37 |
+
# # if detect(cleaned_sentence) != 'en':
|
38 |
+
# # return jsonify({"error": "Please enter the sentence in English."})
|
39 |
+
|
40 |
+
# # encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
41 |
+
# # input_ids = encodings['input_ids'].to(device)
|
42 |
+
# # attention_mask = encodings['attention_mask'].to(device)
|
43 |
+
|
44 |
+
# # with torch.no_grad():
|
45 |
+
# # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
46 |
+
# # logits = outputs.logits
|
47 |
+
# # probabilities = torch.softmax(logits, dim=1)
|
48 |
+
# # predicted_class_index = probabilities.argmax().item()
|
49 |
+
|
50 |
+
# # predicted_intent = id2label[predicted_class_index]
|
51 |
+
# # return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
|
52 |
+
|
53 |
+
# # except Exception as e:
|
54 |
+
# # return jsonify({"error": str(e)})
|
55 |
+
|
56 |
+
# # if __name__ == '__main__':
|
57 |
+
# # app.run(debug=True)
|
58 |
+
|
59 |
# from flask import Flask, request, jsonify, render_template
|
60 |
# from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
61 |
# from bs4 import BeautifulSoup
|
62 |
# from langdetect import detect
|
63 |
+
# from torch.utils.data import DataLoader, TensorDataset
|
64 |
# import json
|
65 |
+
# import torch
|
66 |
# import os
|
67 |
|
68 |
# app = Flask(__name__)
|
69 |
# device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
|
70 |
+
# cache_dir = "/code/cache/huggingface"
|
71 |
+
# if not os.path.exists(cache_dir):
|
72 |
+
# try:
|
73 |
+
# os.makedirs(cache_dir)
|
74 |
+
# os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
|
75 |
+
# except Exception as e:
|
76 |
+
# print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
|
77 |
|
78 |
+
# # cache_dir = "/code/cache/huggingface"
|
79 |
+
# # if not os.path.exists(cache_dir):
|
80 |
+
# # os.makedirs(cache_dir)
|
81 |
# # Load model and tokenizer
|
82 |
# MODEL_PATH = "pankaj100567/Intent-classification"
|
83 |
+
# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
|
84 |
+
# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
|
85 |
+
# # model.eval().to(device)
|
86 |
+
|
87 |
+
|
88 |
+
# # Load label mappings
|
89 |
+
# solution_file_path=os.path.join('surprise.solution')
|
90 |
+
# # test_data_path=os.path.join(data_path,'massive_test.data')
|
91 |
+
# # loading surprise.solution file for getting id2label and label2id mapping
|
92 |
+
# with open(solution_file_path,'r') as solutions_file:
|
93 |
+
# solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
|
94 |
+
|
95 |
+
# labels_list=[]
|
96 |
+
# for label in solutions:
|
97 |
+
# labels_list.append(label['intent'])
|
98 |
+
# unique_labels_list=[]
|
99 |
+
# for x in labels_list:
|
100 |
+
# if x not in unique_labels_list:
|
101 |
+
# unique_labels_list.append(x)
|
102 |
+
# # unique_labels_list, len(unique_labels_list)
|
103 |
+
|
104 |
+
# label2id={}
|
105 |
+
# id2label={}
|
106 |
+
# for i, label in enumerate(unique_labels_list):
|
107 |
+
# label2id[label]=i
|
108 |
+
# id2label[i]=label
|
109 |
+
# # # Load label mappings from a JSON file
|
110 |
+
# # solution_file_path = os.path.join('surprise.solution')
|
111 |
+
# # with open(solution_file_path, 'r') as solutions_file:
|
112 |
+
# # labels = [json.loads(line)['intent'] for line in solutions_file]
|
113 |
+
|
114 |
+
# # label2id = {label: i for i, label in enumerate(set(labels))}
|
115 |
+
# # id2label = {i: label for label, i in label2id.items()}
|
116 |
# @app.route('/')
|
117 |
# def index():
|
118 |
# return render_template('index.html')
|
|
|
130 |
# encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
131 |
# input_ids = encodings['input_ids'].to(device)
|
132 |
# attention_mask = encodings['attention_mask'].to(device)
|
133 |
+
# # Create a TensorDataset
|
134 |
+
# test_dataset = TensorDataset(input_ids, attention_mask,)
|
135 |
+
|
136 |
+
# # Define batch size
|
137 |
+
# batch_size = 32
|
138 |
+
|
139 |
+
# # Create a DataLoader
|
140 |
+
# test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
|
141 |
+
# # Set the model in evaluation mode
|
142 |
+
# model.eval()
|
143 |
+
|
144 |
+
# # Iterate through the batches in the DataLoader
|
145 |
+
# for batch in test_dataloader:
|
146 |
+
# # Unpack the batch
|
147 |
+
# input_ids, attention_mask = batch
|
148 |
+
|
149 |
+
# # Move tensors to the device (e.g., GPU if available)
|
150 |
+
# input_ids = input_ids.to(device)
|
151 |
+
# attention_mask = attention_mask.to(device)
|
152 |
+
|
153 |
|
154 |
+
# # Forward pass to get logits
|
155 |
+
# with torch.no_grad():
|
156 |
+
# outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
157 |
+
|
158 |
+
# # Extract the logits tensor from the outputs
|
159 |
# logits = outputs.logits
|
160 |
+
|
161 |
+
# # Apply softmax to get class probabilities
|
162 |
# probabilities = torch.softmax(logits, dim=1)
|
|
|
163 |
|
164 |
+
# # Get the predicted class (index with the highest probability)
|
165 |
+
# predicted_class = torch.argmax(probabilities, dim=1)
|
166 |
|
167 |
+
|
168 |
+
# # Append the predicted class to the list of predictions
|
169 |
+
# # predictions.extend(predicted_class.tolist())
|
170 |
|
171 |
+
# # with torch.no_grad():
|
172 |
+
# # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
173 |
+
# # logits = outputs.logits
|
174 |
+
# # probabilities = torch.softmax(logits, dim=1)
|
175 |
+
# # predicted_class_index = probabilities.argmax().item()
|
176 |
+
|
177 |
+
# predicted_intent = id2label[predicted_class]
|
178 |
+
# print(predicted_class, predicted_intent)
|
179 |
+
# return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
|
180 |
|
181 |
+
# except Exception as e:
|
182 |
+
# return jsonify({"error": str(e)})
|
183 |
from flask import Flask, request, jsonify, render_template
|
184 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
185 |
from bs4 import BeautifulSoup
|
|
|
204 |
# os.makedirs(cache_dir)
|
205 |
# Load model and tokenizer
|
206 |
MODEL_PATH = "pankaj100567/Intent-classification"
|
207 |
+
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir= cache_dir)
|
208 |
+
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir= cache_dir, num_labels=150)
|
209 |
# model.eval().to(device)
|
210 |
|
211 |
|
|
|
249 |
cleaned_sentence = soup.get_text().strip()
|
250 |
|
251 |
if detect(cleaned_sentence) != 'en':
|
252 |
+
return render_template('result.html', error="Please enter the sentence in English.")
|
253 |
|
254 |
encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
255 |
input_ids = encodings['input_ids'].to(device)
|
256 |
attention_mask = encodings['attention_mask'].to(device)
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
+
test_dataset = TensorDataset(input_ids, attention_mask)
|
259 |
+
test_dataloader = DataLoader(test_dataset, batch_size=1) # Assume a batch size of 1 for individual predictions
|
|
|
260 |
model.eval()
|
261 |
+
|
|
|
262 |
for batch in test_dataloader:
|
|
|
263 |
input_ids, attention_mask = batch
|
|
|
|
|
264 |
input_ids = input_ids.to(device)
|
265 |
attention_mask = attention_mask.to(device)
|
266 |
|
|
|
|
|
267 |
with torch.no_grad():
|
268 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
269 |
+
logits = outputs.logits
|
270 |
+
probabilities = torch.softmax(logits, dim=1)
|
271 |
+
predicted_class = torch.argmax(probabilities, dim=1).item()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
|
273 |
predicted_intent = id2label[predicted_class]
|
274 |
+
return render_template('result.html', intent=predicted_intent, sentence=cleaned_sentence)
|
|
|
275 |
|
276 |
except Exception as e:
|
277 |
+
return render_template('result.html', error=str(e))
|
278 |
+
|
279 |
|
280 |
# if __name__ == '__main__':
|
281 |
# app.run(debug=True)
|
|
requirements.txt
CHANGED
@@ -18,6 +18,7 @@ tokenizers
|
|
18 |
scikit-learn
|
19 |
gradio
|
20 |
nltk
|
|
|
21 |
langdetect
|
22 |
beautifulsoup4
|
23 |
-
|
|
|
18 |
scikit-learn
|
19 |
gradio
|
20 |
nltk
|
21 |
+
gunicorn
|
22 |
langdetect
|
23 |
beautifulsoup4
|
24 |
+
|
templates/index.html
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
<select id="intent-list">
|
13 |
<option value="">-- Select an Intent --</option>
|
14 |
</select>
|
15 |
-
|
16 |
<h2>Test Your Sentence</h2>
|
17 |
<form action="/classify" method="post">
|
18 |
<textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
|
|
|
12 |
<select id="intent-list">
|
13 |
<option value="">-- Select an Intent --</option>
|
14 |
</select>
|
15 |
+
<h3> This website may give sometime wrong important regarding intent , because it is under maintenance </h3>
|
16 |
<h2>Test Your Sentence</h2>
|
17 |
<form action="/classify" method="post">
|
18 |
<textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
|
templates/result.html
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- templates/result.html -->
|
2 |
+
<!DOCTYPE html>
|
3 |
+
<html>
|
4 |
+
<head>
|
5 |
+
<title>Classification Result</title>
|
6 |
+
</head>
|
7 |
+
<body>
|
8 |
+
<h1>Classification Result</h1>
|
9 |
+
{% if error %}
|
10 |
+
<p>Error: {{ error }}</p>
|
11 |
+
{% else %}
|
12 |
+
<p>Sentence: "{{ sentence }}"</p>
|
13 |
+
<p>Intent: {{ intent }}</p>
|
14 |
+
{% endif %}
|
15 |
+
<a href="{{ url_for('index') }}">Try another sentence</a>
|
16 |
+
</body>
|
17 |
+
</html>
|