import ktrain from ktrain import text import pandas as pd from sklearn.model_selection import train_test_split import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" data = pd.read_csv('data/internet_provider.csv') # Replace 'data.csv' with your actual file name categories = ['Slow Connection', 'Billing', 'Setup', 'No Connectivity'] train_data, temp_data = train_test_split(data, test_size=0.2, random_state=42, shuffle=True) val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, shuffle=True) model_name = "distilbert-base-uncased" model = text.Transformer(model_name=model_name, maxlen=512, class_names=categories) train_data = model.preprocess_train(train_data["Text"].tolist(), train_data["Category"].tolist()) val_data = model.preprocess_train(val_data["Text"].tolist(), val_data["Category"].tolist()) test_data = model.preprocess_train(test_data["Text"].tolist(), test_data["Category"].tolist()) classifier = model.get_classifier() learner = ktrain.get_learner(classifier, train_data=train_data, val_data=val_data, batch_size=16) learner.lr_find(show_plot=True, max_epochs=20) learner.fit_onecycle(0.0001, 1) learner.validate(class_names=categories) learner.view_top_losses(n=5, preproc=model) print(train_data.iloc[100]) predictor = ktrain.get_predictor(learner.model, preproc=model) x = "I have issues with my internet connection" prediction = predictor.predict(x) print(f"prediction: {prediction}") print(predictor.explain(x)) predictor.save("distilbest-model") predictor = ktrain.load_predictor("distilbest-model") x = "I have issues with my internet connection" prediction = predictor.predict(x) print(f"prediction: {prediction}") print(predictor.explain(x))