Spaces:

Pankaj10346
/

Intent-classification

Runtime error

App Files Files Community

pankaj goyal commited on Apr 18

Commit

ec4fe29

•

1 Parent(s): 6e887b0

all_files_creation

Browse files

Files changed (6) hide show

Dockerfile +17 -0
main.py +185 -0
requirements.txt +23 -0
static/style.css +39 -0
surprise.solution +0 -0
templates/index.html +63 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.9
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Use the recommended HF_HOME instead of deprecated TRANSFORMERS_CACHE
+ENV HF_HOME=/code/cache/huggingface
+# Create the directory for the Transformers cache and set permissions
+RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
+COPY . /app
+EXPOSE 7860
+# Increase Gunicorn timeout to prevent worker timeout during long initializations
+CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120", "--workers", "2", "--threads", "2"]

main.py ADDED Viewed

	@@ -0,0 +1,185 @@

+# from flask import Flask, request, jsonify, render_template
+# from transformers import RobertaTokenizer, RobertaForSequenceClassification
+# from bs4 import BeautifulSoup
+# from langdetect import detect
+# import torch
+# import json
+# import os
+# app = Flask(__name__)
+# device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
+# # Load model and tokenizer
+# MODEL_PATH = "pankaj100567/Intent-classification"
+# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
+# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
+# model.eval().to(device)
+# # Load label mappings from a JSON file
+# solution_file_path = os.path.join('surprise.solution')
+# with open(solution_file_path, 'r') as solutions_file:
+#     labels = [json.loads(line)['intent'] for line in solutions_file]
+# label2id = {label: i for i, label in enumerate(set(labels))}
+# id2label = {i: label for label, i in label2id.items()}
+# @app.route('/')
+# def index():
+#     return render_template('index.html')
+# @app.route('/classify', methods=['POST'])
+# def classify():
+#     try:
+#         sentence = request.form['sentence']
+#         soup = BeautifulSoup(sentence, "html.parser")
+#         cleaned_sentence = soup.get_text().strip()
+#         if detect(cleaned_sentence) != 'en':
+#             return jsonify({"error": "Please enter the sentence in English."})
+#         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
+#         input_ids = encodings['input_ids'].to(device)
+#         attention_mask = encodings['attention_mask'].to(device)
+#         with torch.no_grad():
+#             outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+#             logits = outputs.logits
+#             probabilities = torch.softmax(logits, dim=1)
+#             predicted_class_index = probabilities.argmax().item()
+#         predicted_intent = id2label[predicted_class_index]
+#         return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
+#     except Exception as e:
+#         return jsonify({"error": str(e)})
+# if __name__ == '__main__':
+#     app.run(debug=True)
+from flask import Flask, request, jsonify, render_template
+from transformers import RobertaTokenizer, RobertaForSequenceClassification
+from bs4 import BeautifulSoup
+from langdetect import detect
+from torch.utils.data import DataLoader, TensorDataset
+import json
+import torch
+import os
+app = Flask(__name__)
+device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
+cache_dir = "/code/cache/huggingface"
+if not os.path.exists(cache_dir):
+    try:
+        os.makedirs(cache_dir)
+        os.chmod(cache_dir, 0o777)  # Set directory permissions to read, write, and execute by all users
+    except Exception as e:
+        print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
+# cache_dir = "/code/cache/huggingface"
+# if not os.path.exists(cache_dir):
+#     os.makedirs(cache_dir)
+# Load model and tokenizer
+MODEL_PATH = "pankaj100567/Intent-classification"
+tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
+model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
+# model.eval().to(device)
+# Load label mappings
+solution_file_path=os.path.join('surprise.solution')
+# test_data_path=os.path.join(data_path,'massive_test.data')
+# loading surprise.solution file for getting id2label and label2id mapping
+with open(solution_file_path,'r') as solutions_file:
+    solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
+labels_list=[]
+for label in solutions:
+    labels_list.append(label['intent'])
+unique_labels_list=[]
+for x in labels_list:
+    if x not in unique_labels_list:
+        unique_labels_list.append(x)
+# unique_labels_list, len(unique_labels_list)
+label2id={}
+id2label={}
+for i, label in enumerate(unique_labels_list):
+    label2id[label]=i
+    id2label[i]=label
+# # Load label mappings from a JSON file
+# solution_file_path = os.path.join('surprise.solution')
+# with open(solution_file_path, 'r') as solutions_file:
+#     labels = [json.loads(line)['intent'] for line in solutions_file]
+# label2id = {label: i for i, label in enumerate(set(labels))}
+# id2label = {i: label for label, i in label2id.items()}
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/classify', methods=['POST'])
+def classify():
+    try:
+        sentence = request.form['sentence']
+        soup = BeautifulSoup(sentence, "html.parser")
+        cleaned_sentence = soup.get_text().strip()
+        if detect(cleaned_sentence) != 'en':
+            return jsonify({"error": "Please enter the sentence in English."})
+        encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
+        input_ids = encodings['input_ids'].to(device)
+        attention_mask = encodings['attention_mask'].to(device)
+        # Create a TensorDataset
+        test_dataset = TensorDataset(input_ids, attention_mask,)
+        # Define batch size
+        batch_size = 32
+        # Create a DataLoader
+        test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
+        # Set the model in evaluation mode
+        model.eval()
+        # Iterate through the batches in the DataLoader
+        for batch in test_dataloader:
+            # Unpack the batch
+            input_ids, attention_mask = batch
+            # Move tensors to the device (e.g., GPU if available)
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            # Forward pass to get logits
+            with torch.no_grad():
+                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            # Extract the logits tensor from the outputs
+            logits = outputs.logits
+            # Apply softmax to get class probabilities
+            probabilities = torch.softmax(logits, dim=1)
+            # Get the predicted class (index with the highest probability)
+            predicted_class = torch.argmax(probabilities, dim=1)
+            # Append the predicted class to the list of predictions
+            # predictions.extend(predicted_class.tolist())
+        # with torch.no_grad():
+        #     outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        #     logits = outputs.logits
+        #     probabilities = torch.softmax(logits, dim=1)
+        #     predicted_class_index = probabilities.argmax().item()
+        predicted_intent = id2label[predicted_class]
+        print(predicted_class, predicted_intent)
+        return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
+    except Exception as e:
+        return jsonify({"error": str(e)})
+# if __name__ == '__main__':
+#     app.run(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+transformers
+accelerate
+einops
+xformers
+bitsandbytes
+huggingface_hub
+PyPDF2
+torch
+datasets
+numpy
+scipy
+Flask
+gunicorn
+Jinja2
+pandas
+matplotlib
+tokenizers
+scikit-learn
+gradio
+nltk
+langdetect
+beautifulsoup4
+gunicorn

static/style.css ADDED Viewed

	@@ -0,0 +1,39 @@

+body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #f4f4f4;
+}
+.container {
+    width: 80%;
+    margin: auto;
+    padding: 20px;
+    background-color: white;
+    box-shadow: 0 0 10px #ccc;
+    margin-top: 20px;
+}
+h1, h2 {
+    color: #333;
+}
+textarea {
+    width: 100%;
+    height: 100px;
+    margin-bottom: 10px;
+    padding: 10px;
+    box-sizing: border-box;
+}
+button {
+    padding: 10px 20px;
+    background-color: #007BFF;
+    color: white;
+    border: none;
+    border-radius: 5px;
+    cursor: pointer;
+}
+button:hover {
+    background-color: #0056b3;}

surprise.solution ADDED Viewed

The diff for this file is too large to render. See raw diff

templates/index.html ADDED Viewed

	@@ -0,0 +1,63 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Intent Classification</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}"></head>
+<body>
+    <div class="container">
+        <h1>Intent Classifier</h1>
+        <p>Select an intent to view (this list is for reference only):</p>
+        <select id="intent-list">
+            <option value="">-- Select an Intent --</option>
+        </select>
+        <h2>Test Your Sentence</h2>
+        <form action="/classify" method="post">
+            <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>
+            <button type="submit">Classify Intent</button>
+        </form>
+        <p id="predicted-intent"></p>
+    </div>
+    <script>
+        // Array of intents
+        const intents = ["accept reservations", "account blocked", "alarm", "application status",
+            "apr", "are you a bot", "balance", "bill balance", "bill due", "book flight",
+            "book hotel", "calculator", "calendar", "calendar update", "calories", "cancel",
+            "cancel reservation", "car rental", "card declined", "carry on", "change accent",
+            "change ai name", "change language", "change speed", "change user name",
+            "change volume", "confirm reservation", "cook time", "credit limit",
+            "credit limit change", "credit score", "current location", "damaged card", "date",
+            "definition", "direct deposit", "directions", "distance", "do you have pets",
+            "exchange rate", "expiration date", "find phone", "flight status", "flip coin",
+            "food last", "freeze account", "fun fact", "gas", "gas type", "goodbye", "greeting",
+            "how busy", "how old are you", "improve credit score", "income", "ingredient substitution",
+            "ingredients list", "insurance", "insurance change", "interest rate", "international fees",
+            "international visa", "jump start", "last maintenance", "lost luggage", "make call", "maybe",
+            "meal suggestion", "meaning of life", "measurement conversion", "meeting schedule", "min payment",
+            "mpg", "new card", "next holiday", "next song", "no", "nutrition info", "oil change how",
+            "oil change when", "order", "order checks", "order status", "pay bill", "payday", "pin change",
+            "play music", "plug type", "pto balance", "pto request", "pto request status", "pto used", "recipe",
+            "redeem rewards", "reminder", "reminder update", "repeat", "replacement card duration", "report fraud",
+            "report lost card", "reset settings", "restaurant reservation", "restaurant reviews", "restaurant suggestion",
+            "rewards balance", "roll dice", "rollover 401k", "routing", "schedule maintenance", "schedule meeting",
+            "share location", "shopping list", "shopping list update", "smart home", "spelling", "spending history",
+            "sync device", "taxes", "tell joke", "text", "thank you", "time", "timer", "timezone", "tire change",
+            "tire pressure", "todo list", "todo list update", "traffic", "transactions", "transfer", "translate",
+            "travel alert", "travel notification", "travel suggestion", "uber", "update playlist", "user name",
+            "vaccines", "w2", "weather", "what are your hobbies", "what can i ask you", "what is your name",
+            "what song", "where are you from", "whisper mode", "who do you work for", "who made you"];
+        // Populating the dropdown
+        const selectElement = document.getElementById('intent-list');
+        intents.forEach(intent => {
+            const option = document.createElement('option');
+            option.value = intent;
+            option.textContent = intent;
+            selectElement.appendChild(option);
+        });
+    </script>
+</body>
+</html>