import gradio as gr from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer from datasets import load_dataset from huggingface_hub import login from huggingface_hub import InferenceClient import torch # Authenticate with Hugging Face login() # Load Dataset from Kaggle (you can change this to your specific Kaggle dataset) # Example: Load a dataset related to password classification, or any text classification dataset dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset # Load Tokenizer and Model model_name = "bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2) # Preprocess the Dataset def preprocess_function(examples): return tokenizer(examples['text'], padding="max_length", truncation=True) # Apply preprocessing to dataset tokenized_datasets = dataset.map(preprocess_function, batched=True) # Split into training and evaluation datasets train_dataset = tokenized_datasets["train"] eval_dataset = tokenized_datasets["test"] # Define Training Arguments training_args = TrainingArguments( output_dir="./results", # output directory num_train_epochs=3, # number of training epochs per_device_train_batch_size=8, # batch size for training per_device_eval_batch_size=16, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay logging_dir="./logs", # directory for storing logs logging_steps=10, evaluation_strategy="epoch", # evaluate each epoch save_strategy="epoch", # save model each epoch ) # Initialize Trainer trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=train_dataset, # training dataset eval_dataset=eval_dataset, # evaluation dataset ) # Train the Model trainer.train() # Save the Model and Tokenizer model.save_pretrained("./password_sniffer_model") tokenizer.save_pretrained("./password_sniffer_tokenizer") # Load the fine-tuned model and tokenizer model = BertForSequenceClassification.from_pretrained("./password_sniffer_model") tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer") # Setup Hugging Face Inference Client client = InferenceClient("password_sniffer_model") def detect_passwords(text): """ Detect potential passwords using the trained BERT model. """ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) outputs = model(**inputs) predictions = torch.softmax(outputs.logits, dim=-1) predicted_class = torch.argmax(predictions, dim=-1).item() if predicted_class == 1: # Assuming '1' represents potential password return "Potential password detected." else: return "No password detected." # Gradio Interface def respond(message, history, system_message, max_tokens, temperature, top_p): detected_passwords = detect_passwords(message) return detected_passwords demo = gr.Interface( fn=respond, inputs=[ gr.Textbox(value="You are a password detection chatbot.", label="System message"), gr.Textbox(value="Hello, your password might be 12345!", label="User input"), ], outputs="text", ) if __name__ == "__main__": demo.launch()