pankaj goyal commited on
Commit
ec4fe29
1 Parent(s): 6e887b0

all_files_creation

Browse files
Files changed (6) hide show
  1. Dockerfile +17 -0
  2. main.py +185 -0
  3. requirements.txt +23 -0
  4. static/style.css +39 -0
  5. surprise.solution +0 -0
  6. templates/index.html +63 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ WORKDIR /app
3
+ COPY requirements.txt /app/requirements.txt
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+
6
+ # Use the recommended HF_HOME instead of deprecated TRANSFORMERS_CACHE
7
+ ENV HF_HOME=/code/cache/huggingface
8
+
9
+ # Create the directory for the Transformers cache and set permissions
10
+ RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
11
+
12
+ COPY . /app
13
+
14
+ EXPOSE 7860
15
+ # Increase Gunicorn timeout to prevent worker timeout during long initializations
16
+ CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120", "--workers", "2", "--threads", "2"]
17
+
main.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from flask import Flask, request, jsonify, render_template
2
+ # from transformers import RobertaTokenizer, RobertaForSequenceClassification
3
+ # from bs4 import BeautifulSoup
4
+ # from langdetect import detect
5
+ # import torch
6
+ # import json
7
+ # import os
8
+
9
+ # app = Flask(__name__)
10
+ # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
11
+
12
+ # # Load model and tokenizer
13
+ # MODEL_PATH = "pankaj100567/Intent-classification"
14
+ # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
15
+ # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
16
+ # model.eval().to(device)
17
+
18
+ # # Load label mappings from a JSON file
19
+ # solution_file_path = os.path.join('surprise.solution')
20
+ # with open(solution_file_path, 'r') as solutions_file:
21
+ # labels = [json.loads(line)['intent'] for line in solutions_file]
22
+
23
+ # label2id = {label: i for i, label in enumerate(set(labels))}
24
+ # id2label = {i: label for label, i in label2id.items()}
25
+
26
+ # @app.route('/')
27
+ # def index():
28
+ # return render_template('index.html')
29
+
30
+ # @app.route('/classify', methods=['POST'])
31
+ # def classify():
32
+ # try:
33
+ # sentence = request.form['sentence']
34
+ # soup = BeautifulSoup(sentence, "html.parser")
35
+ # cleaned_sentence = soup.get_text().strip()
36
+
37
+ # if detect(cleaned_sentence) != 'en':
38
+ # return jsonify({"error": "Please enter the sentence in English."})
39
+
40
+ # encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
41
+ # input_ids = encodings['input_ids'].to(device)
42
+ # attention_mask = encodings['attention_mask'].to(device)
43
+
44
+ # with torch.no_grad():
45
+ # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
46
+ # logits = outputs.logits
47
+ # probabilities = torch.softmax(logits, dim=1)
48
+ # predicted_class_index = probabilities.argmax().item()
49
+
50
+ # predicted_intent = id2label[predicted_class_index]
51
+ # return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
52
+
53
+ # except Exception as e:
54
+ # return jsonify({"error": str(e)})
55
+
56
+ # if __name__ == '__main__':
57
+ # app.run(debug=True)
58
+
59
+ from flask import Flask, request, jsonify, render_template
60
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification
61
+ from bs4 import BeautifulSoup
62
+ from langdetect import detect
63
+ from torch.utils.data import DataLoader, TensorDataset
64
+ import json
65
+ import torch
66
+ import os
67
+
68
+ app = Flask(__name__)
69
+ device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
70
+ cache_dir = "/code/cache/huggingface"
71
+ if not os.path.exists(cache_dir):
72
+ try:
73
+ os.makedirs(cache_dir)
74
+ os.chmod(cache_dir, 0o777) # Set directory permissions to read, write, and execute by all users
75
+ except Exception as e:
76
+ print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
77
+
78
+ # cache_dir = "/code/cache/huggingface"
79
+ # if not os.path.exists(cache_dir):
80
+ # os.makedirs(cache_dir)
81
+ # Load model and tokenizer
82
+ MODEL_PATH = "pankaj100567/Intent-classification"
83
+ tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
84
+ model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
85
+ # model.eval().to(device)
86
+
87
+
88
+ # Load label mappings
89
+ solution_file_path=os.path.join('surprise.solution')
90
+ # test_data_path=os.path.join(data_path,'massive_test.data')
91
+ # loading surprise.solution file for getting id2label and label2id mapping
92
+ with open(solution_file_path,'r') as solutions_file:
93
+ solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
94
+
95
+ labels_list=[]
96
+ for label in solutions:
97
+ labels_list.append(label['intent'])
98
+ unique_labels_list=[]
99
+ for x in labels_list:
100
+ if x not in unique_labels_list:
101
+ unique_labels_list.append(x)
102
+ # unique_labels_list, len(unique_labels_list)
103
+
104
+ label2id={}
105
+ id2label={}
106
+ for i, label in enumerate(unique_labels_list):
107
+ label2id[label]=i
108
+ id2label[i]=label
109
+ # # Load label mappings from a JSON file
110
+ # solution_file_path = os.path.join('surprise.solution')
111
+ # with open(solution_file_path, 'r') as solutions_file:
112
+ # labels = [json.loads(line)['intent'] for line in solutions_file]
113
+
114
+ # label2id = {label: i for i, label in enumerate(set(labels))}
115
+ # id2label = {i: label for label, i in label2id.items()}
116
+ @app.route('/')
117
+ def index():
118
+ return render_template('index.html')
119
+
120
+ @app.route('/classify', methods=['POST'])
121
+ def classify():
122
+ try:
123
+ sentence = request.form['sentence']
124
+ soup = BeautifulSoup(sentence, "html.parser")
125
+ cleaned_sentence = soup.get_text().strip()
126
+
127
+ if detect(cleaned_sentence) != 'en':
128
+ return jsonify({"error": "Please enter the sentence in English."})
129
+
130
+ encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
131
+ input_ids = encodings['input_ids'].to(device)
132
+ attention_mask = encodings['attention_mask'].to(device)
133
+ # Create a TensorDataset
134
+ test_dataset = TensorDataset(input_ids, attention_mask,)
135
+
136
+ # Define batch size
137
+ batch_size = 32
138
+
139
+ # Create a DataLoader
140
+ test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
141
+ # Set the model in evaluation mode
142
+ model.eval()
143
+
144
+ # Iterate through the batches in the DataLoader
145
+ for batch in test_dataloader:
146
+ # Unpack the batch
147
+ input_ids, attention_mask = batch
148
+
149
+ # Move tensors to the device (e.g., GPU if available)
150
+ input_ids = input_ids.to(device)
151
+ attention_mask = attention_mask.to(device)
152
+
153
+
154
+ # Forward pass to get logits
155
+ with torch.no_grad():
156
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
157
+
158
+ # Extract the logits tensor from the outputs
159
+ logits = outputs.logits
160
+
161
+ # Apply softmax to get class probabilities
162
+ probabilities = torch.softmax(logits, dim=1)
163
+
164
+ # Get the predicted class (index with the highest probability)
165
+ predicted_class = torch.argmax(probabilities, dim=1)
166
+
167
+
168
+ # Append the predicted class to the list of predictions
169
+ # predictions.extend(predicted_class.tolist())
170
+
171
+ # with torch.no_grad():
172
+ # outputs = model(input_ids=input_ids, attention_mask=attention_mask)
173
+ # logits = outputs.logits
174
+ # probabilities = torch.softmax(logits, dim=1)
175
+ # predicted_class_index = probabilities.argmax().item()
176
+
177
+ predicted_intent = id2label[predicted_class]
178
+ print(predicted_class, predicted_intent)
179
+ return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
180
+
181
+ except Exception as e:
182
+ return jsonify({"error": str(e)})
183
+
184
+ # if __name__ == '__main__':
185
+ # app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ accelerate
3
+ einops
4
+ xformers
5
+ bitsandbytes
6
+ huggingface_hub
7
+ PyPDF2
8
+ torch
9
+ datasets
10
+ numpy
11
+ scipy
12
+ Flask
13
+ gunicorn
14
+ Jinja2
15
+ pandas
16
+ matplotlib
17
+ tokenizers
18
+ scikit-learn
19
+ gradio
20
+ nltk
21
+ langdetect
22
+ beautifulsoup4
23
+ gunicorn
static/style.css ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: Arial, sans-serif;
3
+ margin: 0;
4
+ padding: 0;
5
+ background-color: #f4f4f4;
6
+ }
7
+
8
+ .container {
9
+ width: 80%;
10
+ margin: auto;
11
+ padding: 20px;
12
+ background-color: white;
13
+ box-shadow: 0 0 10px #ccc;
14
+ margin-top: 20px;
15
+ }
16
+
17
+ h1, h2 {
18
+ color: #333;
19
+ }
20
+
21
+ textarea {
22
+ width: 100%;
23
+ height: 100px;
24
+ margin-bottom: 10px;
25
+ padding: 10px;
26
+ box-sizing: border-box;
27
+ }
28
+
29
+ button {
30
+ padding: 10px 20px;
31
+ background-color: #007BFF;
32
+ color: white;
33
+ border: none;
34
+ border-radius: 5px;
35
+ cursor: pointer;
36
+ }
37
+
38
+ button:hover {
39
+ background-color: #0056b3;}
surprise.solution ADDED
The diff for this file is too large to render. See raw diff
 
templates/index.html ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Intent Classification</title>
7
+ <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}"></head>
8
+ <body>
9
+ <div class="container">
10
+ <h1>Intent Classifier</h1>
11
+ <p>Select an intent to view (this list is for reference only):</p>
12
+ <select id="intent-list">
13
+ <option value="">-- Select an Intent --</option>
14
+ </select>
15
+
16
+ <h2>Test Your Sentence</h2>
17
+ <form action="/classify" method="post">
18
+ <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
19
+ <button type="submit">Classify Intent</button>
20
+ </form>
21
+ <p id="predicted-intent"></p>
22
+ </div>
23
+
24
+ <script>
25
+ // Array of intents
26
+ const intents = ["accept reservations", "account blocked", "alarm", "application status",
27
+ "apr", "are you a bot", "balance", "bill balance", "bill due", "book flight",
28
+ "book hotel", "calculator", "calendar", "calendar update", "calories", "cancel",
29
+ "cancel reservation", "car rental", "card declined", "carry on", "change accent",
30
+ "change ai name", "change language", "change speed", "change user name",
31
+ "change volume", "confirm reservation", "cook time", "credit limit",
32
+ "credit limit change", "credit score", "current location", "damaged card", "date",
33
+ "definition", "direct deposit", "directions", "distance", "do you have pets",
34
+ "exchange rate", "expiration date", "find phone", "flight status", "flip coin",
35
+ "food last", "freeze account", "fun fact", "gas", "gas type", "goodbye", "greeting",
36
+ "how busy", "how old are you", "improve credit score", "income", "ingredient substitution",
37
+ "ingredients list", "insurance", "insurance change", "interest rate", "international fees",
38
+ "international visa", "jump start", "last maintenance", "lost luggage", "make call", "maybe",
39
+ "meal suggestion", "meaning of life", "measurement conversion", "meeting schedule", "min payment",
40
+ "mpg", "new card", "next holiday", "next song", "no", "nutrition info", "oil change how",
41
+ "oil change when", "order", "order checks", "order status", "pay bill", "payday", "pin change",
42
+ "play music", "plug type", "pto balance", "pto request", "pto request status", "pto used", "recipe",
43
+ "redeem rewards", "reminder", "reminder update", "repeat", "replacement card duration", "report fraud",
44
+ "report lost card", "reset settings", "restaurant reservation", "restaurant reviews", "restaurant suggestion",
45
+ "rewards balance", "roll dice", "rollover 401k", "routing", "schedule maintenance", "schedule meeting",
46
+ "share location", "shopping list", "shopping list update", "smart home", "spelling", "spending history",
47
+ "sync device", "taxes", "tell joke", "text", "thank you", "time", "timer", "timezone", "tire change",
48
+ "tire pressure", "todo list", "todo list update", "traffic", "transactions", "transfer", "translate",
49
+ "travel alert", "travel notification", "travel suggestion", "uber", "update playlist", "user name",
50
+ "vaccines", "w2", "weather", "what are your hobbies", "what can i ask you", "what is your name",
51
+ "what song", "where are you from", "whisper mode", "who do you work for", "who made you"];
52
+
53
+ // Populating the dropdown
54
+ const selectElement = document.getElementById('intent-list');
55
+ intents.forEach(intent => {
56
+ const option = document.createElement('option');
57
+ option.value = intent;
58
+ option.textContent = intent;
59
+ selectElement.appendChild(option);
60
+ });
61
+ </script>
62
+ </body>
63
+ </html>