Spaces:

Pankaj10346
/

Intent-classification

Runtime error

App Files Files Community

pankaj goyal commited on Apr 18

Commit

9129286

•

1 Parent(s): 25bfc0c

modifield result.html

Browse files

Files changed (5) hide show

Dockerfile +1 -0
main.py +158 -63
requirements.txt +2 -1
templates/index.html +1 -1
templates/result.html +17 -0

Dockerfile CHANGED Viewed

@@ -10,6 +10,7 @@ ENV HF_HOME=/code/cache/huggingface
 RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
 COPY . /app
 EXPOSE 7860
 # Increase Gunicorn timeout to prevent worker timeout during long initializations
 CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]

 RUN mkdir -p /code/cache/huggingface && chmod -R 777 /code/cache/huggingface
 COPY . /app
 EXPOSE 7860
 # Increase Gunicorn timeout to prevent worker timeout during long initializations
 CMD ["gunicorn", "-b", "0.0.0.0:7862", "main:app", "--timeout", "120",]

main.py CHANGED Viewed

@@ -1,28 +1,118 @@
 # from flask import Flask, request, jsonify, render_template
 # from transformers import RobertaTokenizer, RobertaForSequenceClassification
 # from bs4 import BeautifulSoup
 # from langdetect import detect
-# import torch
 # import json
 # import os
 # app = Flask(__name__)
 # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
 # # Load model and tokenizer
 # MODEL_PATH = "pankaj100567/Intent-classification"
-# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
-# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
-# model.eval().to(device)
-# # Load label mappings from a JSON file
-# solution_file_path = os.path.join('surprise.solution')
-# with open(solution_file_path, 'r') as solutions_file:
-#     labels = [json.loads(line)['intent'] for line in solutions_file]
-# label2id = {label: i for i, label in enumerate(set(labels))}
-# id2label = {i: label for label, i in label2id.items()}
 # @app.route('/')
 # def index():
 #     return render_template('index.html')
@@ -40,22 +130,56 @@
 #         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
 #         input_ids = encodings['input_ids'].to(device)
 #         attention_mask = encodings['attention_mask'].to(device)
-#         with torch.no_grad():
-#             outputs = model(input_ids=input_ids, attention_mask=attention_mask)
 #             logits = outputs.logits
 #             probabilities = torch.softmax(logits, dim=1)
-#             predicted_class_index = probabilities.argmax().item()
-#         predicted_intent = id2label[predicted_class_index]
-#         return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
-#     except Exception as e:
-#         return jsonify({"error": str(e)})
-# if __name__ == '__main__':
-#     app.run(debug=True)
 from flask import Flask, request, jsonify, render_template
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 from bs4 import BeautifulSoup
@@ -80,8 +204,8 @@ if not os.path.exists(cache_dir):
 #     os.makedirs(cache_dir)
 # Load model and tokenizer
 MODEL_PATH = "pankaj100567/Intent-classification"
-tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
-model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
 # model.eval().to(device)
@@ -125,62 +249,33 @@ def classify():
         cleaned_sentence = soup.get_text().strip()
         if detect(cleaned_sentence) != 'en':
-            return jsonify({"error": "Please enter the sentence in English."})
         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
         input_ids = encodings['input_ids'].to(device)
         attention_mask = encodings['attention_mask'].to(device)
-        # Create a TensorDataset
-        test_dataset = TensorDataset(input_ids, attention_mask,)
-        # Define batch size
-        batch_size = 32
-        # Create a DataLoader
-        test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
-        # Set the model in evaluation mode
         model.eval()
-        # Iterate through the batches in the DataLoader
         for batch in test_dataloader:
-            # Unpack the batch
             input_ids, attention_mask = batch
-            # Move tensors to the device (e.g., GPU if available)
             input_ids = input_ids.to(device)
             attention_mask = attention_mask.to(device)
-            # Forward pass to get logits
             with torch.no_grad():
                 outputs = model(input_ids=input_ids, attention_mask=attention_mask)
-            # Extract the logits tensor from the outputs
-            logits = outputs.logits
-            # Apply softmax to get class probabilities
-            probabilities = torch.softmax(logits, dim=1)
-            # Get the predicted class (index with the highest probability)
-            predicted_class = torch.argmax(probabilities, dim=1)
-            # Append the predicted class to the list of predictions
-            # predictions.extend(predicted_class.tolist())
-        # with torch.no_grad():
-        #     outputs = model(input_ids=input_ids, attention_mask=attention_mask)
-        #     logits = outputs.logits
-        #     probabilities = torch.softmax(logits, dim=1)
-        #     predicted_class_index = probabilities.argmax().item()
         predicted_intent = id2label[predicted_class]
-        print(predicted_class, predicted_intent)
-        return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
     except Exception as e:
-        return jsonify({"error": str(e)})
 # if __name__ == '__main__':
 #     app.run(debug=True)

+# # from flask import Flask, request, jsonify, render_template
+# # from transformers import RobertaTokenizer, RobertaForSequenceClassification
+# # from bs4 import BeautifulSoup
+# # from langdetect import detect
+# # import torch
+# # import json
+# # import os
+# # app = Flask(__name__)
+# # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
+# # # Load model and tokenizer
+# # MODEL_PATH = "pankaj100567/Intent-classification"
+# # tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH)
+# # model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH)
+# # model.eval().to(device)
+# # # Load label mappings from a JSON file
+# # solution_file_path = os.path.join('surprise.solution')
+# # with open(solution_file_path, 'r') as solutions_file:
+# #     labels = [json.loads(line)['intent'] for line in solutions_file]
+# # label2id = {label: i for i, label in enumerate(set(labels))}
+# # id2label = {i: label for label, i in label2id.items()}
+# # @app.route('/')
+# # def index():
+# #     return render_template('index.html')
+# # @app.route('/classify', methods=['POST'])
+# # def classify():
+# #     try:
+# #         sentence = request.form['sentence']
+# #         soup = BeautifulSoup(sentence, "html.parser")
+# #         cleaned_sentence = soup.get_text().strip()
+# #         if detect(cleaned_sentence) != 'en':
+# #             return jsonify({"error": "Please enter the sentence in English."})
+# #         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
+# #         input_ids = encodings['input_ids'].to(device)
+# #         attention_mask = encodings['attention_mask'].to(device)
+# #         with torch.no_grad():
+# #             outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+# #             logits = outputs.logits
+# #             probabilities = torch.softmax(logits, dim=1)
+# #             predicted_class_index = probabilities.argmax().item()
+# #         predicted_intent = id2label[predicted_class_index]
+# #         return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
+# #     except Exception as e:
+# #         return jsonify({"error": str(e)})
+# # if __name__ == '__main__':
+# #     app.run(debug=True)
 # from flask import Flask, request, jsonify, render_template
 # from transformers import RobertaTokenizer, RobertaForSequenceClassification
 # from bs4 import BeautifulSoup
 # from langdetect import detect
+# from torch.utils.data import DataLoader, TensorDataset
 # import json
+# import torch
 # import os
 # app = Flask(__name__)
 # device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
+# cache_dir = "/code/cache/huggingface"
+# if not os.path.exists(cache_dir):
+#     try:
+#         os.makedirs(cache_dir)
+#         os.chmod(cache_dir, 0o777)  # Set directory permissions to read, write, and execute by all users
+#     except Exception as e:
+#         print(f"Failed to create or set permissions for directory {cache_dir}: {e}")
+# # cache_dir = "/code/cache/huggingface"
+# # if not os.path.exists(cache_dir):
+# #     os.makedirs(cache_dir)
 # # Load model and tokenizer
 # MODEL_PATH = "pankaj100567/Intent-classification"
+# tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir=cache_dir)
+# model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir=cache_dir, num_labels=150)
+# # model.eval().to(device)
+# # Load label mappings
+# solution_file_path=os.path.join('surprise.solution')
+# # test_data_path=os.path.join(data_path,'massive_test.data')
+# # loading surprise.solution file for getting id2label and label2id mapping
+# with open(solution_file_path,'r') as solutions_file:
+#     solutions=[json.loads(line) for line in solutions_file] # reading json data from data_path and parse it into a test_data list
+# labels_list=[]
+# for label in solutions:
+#     labels_list.append(label['intent'])
+# unique_labels_list=[]
+# for x in labels_list:
+#     if x not in unique_labels_list:
+#         unique_labels_list.append(x)
+# # unique_labels_list, len(unique_labels_list)
+# label2id={}
+# id2label={}
+# for i, label in enumerate(unique_labels_list):
+#     label2id[label]=i
+#     id2label[i]=label
+# # # Load label mappings from a JSON file
+# # solution_file_path = os.path.join('surprise.solution')
+# # with open(solution_file_path, 'r') as solutions_file:
+# #     labels = [json.loads(line)['intent'] for line in solutions_file]
+# # label2id = {label: i for i, label in enumerate(set(labels))}
+# # id2label = {i: label for label, i in label2id.items()}
 # @app.route('/')
 # def index():
 #     return render_template('index.html')
 #         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
 #         input_ids = encodings['input_ids'].to(device)
 #         attention_mask = encodings['attention_mask'].to(device)
+#         # Create a TensorDataset
+#         test_dataset = TensorDataset(input_ids, attention_mask,)
+#         # Define batch size
+#         batch_size = 32
+#         # Create a DataLoader
+#         test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
+#         # Set the model in evaluation mode
+#         model.eval()
+#         # Iterate through the batches in the DataLoader
+#         for batch in test_dataloader:
+#             # Unpack the batch
+#             input_ids, attention_mask = batch
+#             # Move tensors to the device (e.g., GPU if available)
+#             input_ids = input_ids.to(device)
+#             attention_mask = attention_mask.to(device)
+#             # Forward pass to get logits
+#             with torch.no_grad():
+#                 outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+#             # Extract the logits tensor from the outputs
 #             logits = outputs.logits
+#             # Apply softmax to get class probabilities
 #             probabilities = torch.softmax(logits, dim=1)
+#             # Get the predicted class (index with the highest probability)
+#             predicted_class = torch.argmax(probabilities, dim=1)
+#             # Append the predicted class to the list of predictions
+#             # predictions.extend(predicted_class.tolist())
+#         # with torch.no_grad():
+#         #     outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+#         #     logits = outputs.logits
+#         #     probabilities = torch.softmax(logits, dim=1)
+#         #     predicted_class_index = probabilities.argmax().item()
+#         predicted_intent = id2label[predicted_class]
+#         print(predicted_class, predicted_intent)
+#         return jsonify({"intent": predicted_intent, "sentence": cleaned_sentence})
+#     except Exception as e:
+        # return jsonify({"error": str(e)})
 from flask import Flask, request, jsonify, render_template
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 from bs4 import BeautifulSoup
 #     os.makedirs(cache_dir)
 # Load model and tokenizer
 MODEL_PATH = "pankaj100567/Intent-classification"
+tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir= cache_dir)
+model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir= cache_dir, num_labels=150)
 # model.eval().to(device)
         cleaned_sentence = soup.get_text().strip()
         if detect(cleaned_sentence) != 'en':
+            return render_template('result.html', error="Please enter the sentence in English.")
         encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
         input_ids = encodings['input_ids'].to(device)
         attention_mask = encodings['attention_mask'].to(device)
+        test_dataset = TensorDataset(input_ids, attention_mask)
+        test_dataloader = DataLoader(test_dataset, batch_size=1)  # Assume a batch size of 1 for individual predictions
         model.eval()
         for batch in test_dataloader:
             input_ids, attention_mask = batch
             input_ids = input_ids.to(device)
             attention_mask = attention_mask.to(device)
             with torch.no_grad():
                 outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+                logits = outputs.logits
+                probabilities = torch.softmax(logits, dim=1)
+                predicted_class = torch.argmax(probabilities, dim=1).item()
         predicted_intent = id2label[predicted_class]
+        return render_template('result.html', intent=predicted_intent, sentence=cleaned_sentence)
     except Exception as e:
+        return render_template('result.html', error=str(e))
 # if __name__ == '__main__':
 #     app.run(debug=True)

requirements.txt CHANGED Viewed

@@ -18,6 +18,7 @@ tokenizers
 scikit-learn
 gradio
 nltk
 langdetect
 beautifulsoup4
-gunicorn

 scikit-learn
 gradio
 nltk
+gunicorn
 langdetect
 beautifulsoup4

templates/index.html CHANGED Viewed

@@ -12,7 +12,7 @@
         <select id="intent-list">
             <option value="">-- Select an Intent --</option>
         </select>
         <h2>Test Your Sentence</h2>
         <form action="/classify" method="post">
             <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>

         <select id="intent-list">
             <option value="">-- Select an Intent --</option>
         </select>
+        <h3> This website may give sometime wrong important regarding intent , because it is under maintenance </h3>
         <h2>Test Your Sentence</h2>
         <form action="/classify" method="post">
             <textarea name="sentence" placeholder="Enter your sentence here..." required></textarea>

templates/result.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!-- templates/result.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Classification Result</title>
+</head>
+<body>
+    <h1>Classification Result</h1>
+    {% if error %}
+        <p>Error: {{ error }}</p>
+    {% else %}
+        <p>Sentence: "{{ sentence }}"</p>
+        <p>Intent: {{ intent }}</p>
+    {% endif %}
+    <a href="{{ url_for('index') }}">Try another sentence</a>
+</body>
+</html>