from flask import Flask, request, jsonify, render_template
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from bs4 import BeautifulSoup
from langdetect import detect
from torch.utils.data import DataLoader, TensorDataset
import json
import torch
import os

app = Flask(__name__)

device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

cache_dir = "/code/cache/huggingface"
if not os.path.exists(cache_dir):
    try:
        os.makedirs(cache_dir)
        os.chmod(cache_dir, 0o777)  # Set directory permissions to read, write, and execute by all users
    except Exception as e:
        print(f"Failed to create or set permissions for directory {cache_dir}: {e}")

# Load model and tokenizer
MODEL_PATH = "pankaj100567/Intent-classification"
tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, cache_dir= cache_dir)
model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, cache_dir= cache_dir, num_labels=150)
# model.eval().to(device)

# Load label mappings
solution_file_path=os.path.join('surprise.solution')

# loading surprise.solution file for getting id2label and label2id mapping
with open(solution_file_path,'r') as solutions_file:
    solutions=[json.loads(line) for line in solutions_file]

# reading json data from data_path and parse it into a test_data list
labels_list=[]
for label in solutions:
    labels_list.append(label['intent'])

unique_labels_list=[]
for x in labels_list:
    if x not in unique_labels_list:
        unique_labels_list.append(x)

label2id={}
id2label={}
for i, label in enumerate(unique_labels_list):
    label2id[label]=i
    id2label[i]=label

# id2label = {i: label for label, i in label2id.items()} @app.route('/') def index(): return render_template('index.html') @app.route('/classify', methods=['POST']) def classify(): try: sentence = request.form['sentence'] soup = BeautifulSoup(sentence, "html.parser") cleaned_sentence = soup.get_text().strip() if detect(cleaned_sentence) != 'en': return render_template('result.html', error="Please enter the sentence in English.") encodings = tokenizer(cleaned_sentence, return_tensors="pt", padding=True, truncation=True, max_length=512) input_ids = encodings['input_ids'].to(device) attention_mask = encodings['attention_mask'].to(device) test_dataset = TensorDataset(input_ids, attention_mask) test_dataloader = DataLoader(test_dataset, batch_size=1) # Assume a batch size of 1 for individual predictions model.eval() for batch in test_dataloader: input_ids, attention_mask = batch input_ids = input_ids.to(device) attention_mask = attention_mask.to(device) with torch.no_grad(): outputs = model(input_ids=input_ids, attention_mask=attention_mask) logits = outputs.logits probabilities = torch.softmax(logits, dim=1) predicted_class = torch.argmax(probabilities, dim=1).item() predicted_intent = id2label[predicted_class] return render_template('result.html', intent=predicted_intent, sentence=cleaned_sentence) except Exception as e: return render_template('result.html', error=str(e)) # if __name__ == '__main__': # app.run(debug=True)