Spaces:
Runtime error
Runtime error
pankaj goyal
commited on
Commit
•
ec4fe29
1
Parent(s):
6e887b0
all_files_creation
Browse files- Dockerfile +17 -0
- main.py +185 -0
- requirements.txt +23 -0
- static/style.css +39 -0
- surprise.solution +0 -0
- templates/index.html +63 -0
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
WORKDIR /app
|
3 |
+
COPY requirements.txt /app/requirements.txt
|
4 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
5 |
+
|
6 |
+
# Use the recommended HF_HOME instead of deprecated TRANSFORMERS_CACHE
|
7 |
+
ENV HF_HOME=/code/cache/huggingface
|
8 |
+
|
9 |
+
# Create the directory for the Transformers cache and set permissions
|
10 |
+
RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
|
11 |
+
|
12 |
+
COPY . /app
|
13 |
+
|
14 |
+
EXPOSE 7860
|
15 |
+
# Increase Gunicorn timeout to prevent worker timeout during long initializations
|
16 |
+
CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120", "--workers", "2", "--threads", "2"]
|
17 |
+
|
main.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from flask import Flask, request, jsonify, render_template
|
2 |
+
# from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
3 |
+
# from bs4 import BeautifulSoup
|
4 |
+
# from langdetect import detect
|
5 |
+
# import torch
|
6 |
+
# import json
|
7 |
+
# import os
|
8 |
+
|
9 |
+
# app = Flask(__name__)
|
10 |
+
# device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
|
11 |
+
|
12 |
+
# # Load model and tokenizer
|
13 |
+
# MODEL_PATH = "pankaj100567/Intent-classification"
|
14 |
+
# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
|
15 |
+
# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
|
16 |
+
# model.eval().to(device)
|
17 |
+
|
18 |
+
# # Load label mappings from a JSON file
|
19 |
+
# solution_file_path = os.path.join('surprise.solution')
|
20 |
+
# with open(solution_file_path, 'r') as solutions_file:
|
21 |
+
# labels = [json.loads(line)['intent'] for line in solutions_file]
|
22 |
+
|
23 |
+
# label2id = {label: i for i, label in enumerate(set(labels))}
|
24 |
+
# id2label = {i: label for label, i in label2id.items()}
|
25 |
+
|
26 |
+
# @app.route('/')
|
27 |
+
# def index():
|
28 |
+
# return render_template('index.html')
|
29 |
+
|
30 |
+
# @app.route('/classify', methods=['POST'])
|
31 |
+
# def classify():
|
32 |
+
# try:
|
33 |
+
# sentence = request.form['sentence']
|
34 |
+
# soup = BeautifulSoup(sentence, "html.parser")
|
35 |
+
# cleaned_sentence = soup.get_text().strip()
|
36 |
+
|
37 |
+
# if detect(cleaned_sentence) != 'en':
|
38 |
+
# return jsonify({"error": "Please enter the sentence in English."})
|
39 |
+
|
40 |
+
# encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
41 |
+
# input_ids = encodings['input_ids'].to(device)
|
42 |
+
# attention_mask = encodings['attention_mask'].to(device)
|
43 |
+
|
44 |
+
# with torch.no_grad():
|
45 |
+
# outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
46 |
+
# logits = outputs.logits
|
47 |
+
# probabilities = torch.softmax(logits, dim=1)
|
48 |
+
# predicted_class_index = probabilities.argmax().item()
|
49 |
+
|
50 |
+
# predicted_intent = id2label[predicted_class_index]
|
51 |
+
# return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
|
52 |
+
|
53 |
+
# except Exception as e:
|
54 |
+
# return jsonify({"error": str(e)})
|
55 |
+
|
56 |
+
# if __name__ == '__main__':
|
57 |
+
# app.run(debug=True)
|
58 |
+
|
59 |
+
from flask import Flask, request, jsonify, render_template
|
60 |
+
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
61 |
+
from bs4 import BeautifulSoup
|
62 |
+
from langdetect import detect
|
63 |
+
from torch.utils.data import DataLoader, TensorDataset
|
64 |
+
import json
|
65 |
+
import torch
|
66 |
+
import os
|
67 |
+
|
68 |
+
app = Flask(__name__)
|
69 |
+
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
|
70 |
+
cache_dir = "/code/cache/huggingface"
|
71 |
+
if not os.path.exists(cache_dir):
|
72 |
+
try:
|
73 |
+
os.makedirs(cache_dir)
|
74 |
+
os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
|
77 |
+
|
78 |
+
# cache_dir = "/code/cache/huggingface"
|
79 |
+
# if not os.path.exists(cache_dir):
|
80 |
+
# os.makedirs(cache_dir)
|
81 |
+
# Load model and tokenizer
|
82 |
+
MODEL_PATH = "pankaj100567/Intent-classification"
|
83 |
+
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
|
84 |
+
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
|
85 |
+
# model.eval().to(device)
|
86 |
+
|
87 |
+
|
88 |
+
# Load label mappings
|
89 |
+
solution_file_path=os.path.join('surprise.solution')
|
90 |
+
# test_data_path=os.path.join(data_path,'massive_test.data')
|
91 |
+
# loading surprise.solution file for getting id2label and label2id mapping
|
92 |
+
with open(solution_file_path,'r') as solutions_file:
|
93 |
+
solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
|
94 |
+
|
95 |
+
labels_list=[]
|
96 |
+
for label in solutions:
|
97 |
+
labels_list.append(label['intent'])
|
98 |
+
unique_labels_list=[]
|
99 |
+
for x in labels_list:
|
100 |
+
if x not in unique_labels_list:
|
101 |
+
unique_labels_list.append(x)
|
102 |
+
# unique_labels_list, len(unique_labels_list)
|
103 |
+
|
104 |
+
label2id={}
|
105 |
+
id2label={}
|
106 |
+
for i, label in enumerate(unique_labels_list):
|
107 |
+
label2id[label]=i
|
108 |
+
id2label[i]=label
|
109 |
+
# # Load label mappings from a JSON file
|
110 |
+
# solution_file_path = os.path.join('surprise.solution')
|
111 |
+
# with open(solution_file_path, 'r') as solutions_file:
|
112 |
+
# labels = [json.loads(line)['intent'] for line in solutions_file]
|
113 |
+
|
114 |
+
# label2id = {label: i for i, label in enumerate(set(labels))}
|
115 |
+
# id2label = {i: label for label, i in label2id.items()}
|
116 |
+
@app.route('/')
|
117 |
+
def index():
|
118 |
+
return render_template('index.html')
|
119 |
+
|
120 |
+
@app.route('/classify', methods=['POST'])
|
121 |
+
def classify():
|
122 |
+
try:
|
123 |
+
sentence = request.form['sentence']
|
124 |
+
soup = BeautifulSoup(sentence, "html.parser")
|
125 |
+
cleaned_sentence = soup.get_text().strip()
|
126 |
+
|
127 |
+
if detect(cleaned_sentence) != 'en':
|
128 |
+
return jsonify({"error": "Please enter the sentence in English."})
|
129 |
+
|
130 |
+
encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
131 |
+
input_ids = encodings['input_ids'].to(device)
|
132 |
+
attention_mask = encodings['attention_mask'].to(device)
|
133 |
+
# Create a TensorDataset
|
134 |
+
test_dataset = TensorDataset(input_ids, attention_mask,)
|
135 |
+
|
136 |
+
# Define batch size
|
137 |
+
batch_size = 32
|
138 |
+
|
139 |
+
# Create a DataLoader
|
140 |
+
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
|
141 |
+
# Set the model in evaluation mode
|
142 |
+
model.eval()
|
143 |
+
|
144 |
+
# Iterate through the batches in the DataLoader
|
145 |
+
for batch in test_dataloader:
|
146 |
+
# Unpack the batch
|
147 |
+
input_ids, attention_mask = batch
|
148 |
+
|
149 |
+
# Move tensors to the device (e.g., GPU if available)
|
150 |
+
input_ids = input_ids.to(device)
|
151 |
+
attention_mask = attention_mask.to(device)
|
152 |
+
|
153 |
+
|
154 |
+
# Forward pass to get logits
|
155 |
+
with torch.no_grad():
|
156 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
157 |
+
|
158 |
+
# Extract the logits tensor from the outputs
|
159 |
+
logits = outputs.logits
|
160 |
+
|
161 |
+
# Apply softmax to get class probabilities
|
162 |
+
probabilities = torch.softmax(logits, dim=1)
|
163 |
+
|
164 |
+
# Get the predicted class (index with the highest probability)
|
165 |
+
predicted_class = torch.argmax(probabilities, dim=1)
|
166 |
+
|
167 |
+
|
168 |
+
# Append the predicted class to the list of predictions
|
169 |
+
# predictions.extend(predicted_class.tolist())
|
170 |
+
|
171 |
+
# with torch.no_grad():
|
172 |
+
# outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
173 |
+
# logits = outputs.logits
|
174 |
+
# probabilities = torch.softmax(logits, dim=1)
|
175 |
+
# predicted_class_index = probabilities.argmax().item()
|
176 |
+
|
177 |
+
predicted_intent = id2label[predicted_class]
|
178 |
+
print(predicted_class, predicted_intent)
|
179 |
+
return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
|
180 |
+
|
181 |
+
except Exception as e:
|
182 |
+
return jsonify({"error": str(e)})
|
183 |
+
|
184 |
+
# if __name__ == '__main__':
|
185 |
+
# app.run(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
accelerate
|
3 |
+
einops
|
4 |
+
xformers
|
5 |
+
bitsandbytes
|
6 |
+
huggingface_hub
|
7 |
+
PyPDF2
|
8 |
+
torch
|
9 |
+
datasets
|
10 |
+
numpy
|
11 |
+
scipy
|
12 |
+
Flask
|
13 |
+
gunicorn
|
14 |
+
Jinja2
|
15 |
+
pandas
|
16 |
+
matplotlib
|
17 |
+
tokenizers
|
18 |
+
scikit-learn
|
19 |
+
gradio
|
20 |
+
nltk
|
21 |
+
langdetect
|
22 |
+
beautifulsoup4
|
23 |
+
gunicorn
|
static/style.css
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
font-family: Arial, sans-serif;
|
3 |
+
margin: 0;
|
4 |
+
padding: 0;
|
5 |
+
background-color: #f4f4f4;
|
6 |
+
}
|
7 |
+
|
8 |
+
.container {
|
9 |
+
width: 80%;
|
10 |
+
margin: auto;
|
11 |
+
padding: 20px;
|
12 |
+
background-color: white;
|
13 |
+
box-shadow: 0 0 10px #ccc;
|
14 |
+
margin-top: 20px;
|
15 |
+
}
|
16 |
+
|
17 |
+
h1, h2 {
|
18 |
+
color: #333;
|
19 |
+
}
|
20 |
+
|
21 |
+
textarea {
|
22 |
+
width: 100%;
|
23 |
+
height: 100px;
|
24 |
+
margin-bottom: 10px;
|
25 |
+
padding: 10px;
|
26 |
+
box-sizing: border-box;
|
27 |
+
}
|
28 |
+
|
29 |
+
button {
|
30 |
+
padding: 10px 20px;
|
31 |
+
background-color: #007BFF;
|
32 |
+
color: white;
|
33 |
+
border: none;
|
34 |
+
border-radius: 5px;
|
35 |
+
cursor: pointer;
|
36 |
+
}
|
37 |
+
|
38 |
+
button:hover {
|
39 |
+
background-color: #0056b3;}
|
surprise.solution
ADDED
The diff for this file is too large to render.
See raw diff
|
|
templates/index.html
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Intent Classification</title>
|
7 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}"></head>
|
8 |
+
<body>
|
9 |
+
<div class="container">
|
10 |
+
<h1>Intent Classifier</h1>
|
11 |
+
<p>Select an intent to view (this list is for reference only):</p>
|
12 |
+
<select id="intent-list">
|
13 |
+
<option value="">-- Select an Intent --</option>
|
14 |
+
</select>
|
15 |
+
|
16 |
+
<h2>Test Your Sentence</h2>
|
17 |
+
<form action="/classify" method="post">
|
18 |
+
<textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
|
19 |
+
<button type="submit">Classify Intent</button>
|
20 |
+
</form>
|
21 |
+
<p id="predicted-intent"></p>
|
22 |
+
</div>
|
23 |
+
|
24 |
+
<script>
|
25 |
+
// Array of intents
|
26 |
+
const intents = ["accept reservations", "account blocked", "alarm", "application status",
|
27 |
+
"apr", "are you a bot", "balance", "bill balance", "bill due", "book flight",
|
28 |
+
"book hotel", "calculator", "calendar", "calendar update", "calories", "cancel",
|
29 |
+
"cancel reservation", "car rental", "card declined", "carry on", "change accent",
|
30 |
+
"change ai name", "change language", "change speed", "change user name",
|
31 |
+
"change volume", "confirm reservation", "cook time", "credit limit",
|
32 |
+
"credit limit change", "credit score", "current location", "damaged card", "date",
|
33 |
+
"definition", "direct deposit", "directions", "distance", "do you have pets",
|
34 |
+
"exchange rate", "expiration date", "find phone", "flight status", "flip coin",
|
35 |
+
"food last", "freeze account", "fun fact", "gas", "gas type", "goodbye", "greeting",
|
36 |
+
"how busy", "how old are you", "improve credit score", "income", "ingredient substitution",
|
37 |
+
"ingredients list", "insurance", "insurance change", "interest rate", "international fees",
|
38 |
+
"international visa", "jump start", "last maintenance", "lost luggage", "make call", "maybe",
|
39 |
+
"meal suggestion", "meaning of life", "measurement conversion", "meeting schedule", "min payment",
|
40 |
+
"mpg", "new card", "next holiday", "next song", "no", "nutrition info", "oil change how",
|
41 |
+
"oil change when", "order", "order checks", "order status", "pay bill", "payday", "pin change",
|
42 |
+
"play music", "plug type", "pto balance", "pto request", "pto request status", "pto used", "recipe",
|
43 |
+
"redeem rewards", "reminder", "reminder update", "repeat", "replacement card duration", "report fraud",
|
44 |
+
"report lost card", "reset settings", "restaurant reservation", "restaurant reviews", "restaurant suggestion",
|
45 |
+
"rewards balance", "roll dice", "rollover 401k", "routing", "schedule maintenance", "schedule meeting",
|
46 |
+
"share location", "shopping list", "shopping list update", "smart home", "spelling", "spending history",
|
47 |
+
"sync device", "taxes", "tell joke", "text", "thank you", "time", "timer", "timezone", "tire change",
|
48 |
+
"tire pressure", "todo list", "todo list update", "traffic", "transactions", "transfer", "translate",
|
49 |
+
"travel alert", "travel notification", "travel suggestion", "uber", "update playlist", "user name",
|
50 |
+
"vaccines", "w2", "weather", "what are your hobbies", "what can i ask you", "what is your name",
|
51 |
+
"what song", "where are you from", "whisper mode", "who do you work for", "who made you"];
|
52 |
+
|
53 |
+
// Populating the dropdown
|
54 |
+
const selectElement = document.getElementById('intent-list');
|
55 |
+
intents.forEach(intent => {
|
56 |
+
const option = document.createElement('option');
|
57 |
+
option.value = intent;
|
58 |
+
option.textContent = intent;
|
59 |
+
selectElement.appendChild(option);
|
60 |
+
});
|
61 |
+
</script>
|
62 |
+
</body>
|
63 |
+
</html>
|