| from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, pipeline | |
| import torch | |
| # Load the trained model and tokenizer | |
| model_path = "models/distilbert" | |
| tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # Use original tokenizer | |
| model = DistilBertForSequenceClassification.from_pretrained(model_path) | |
| # Create pipeline with both model and tokenizer | |
| classifier = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| # Example test | |
| sample_text = "I feel hopeless and have trouble sleeping." | |
| result = classifier(sample_text) | |
| print("Prediction:", result) | |
| # You can also get the label mapping from your training data | |
| import pandas as pd | |
| train_df = pd.read_csv("data/train.csv") | |
| unique_labels = sorted(train_df["label"].unique()) | |
| print(f"Available labels: {unique_labels}") | |