import numpy as np import tensorflow as tf import re from transformers import BertTokenizer @st.cache(allow_output_mutation=True) def get_model(): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = tf.saved_model.load(PlengP/hfphishoffbert300000) return tokenizer, model def main(): st.title("Welcome to Phish-Off!") tokenizer, model = get_model() user_input = st.text_area('Enter your URL here...') button = st.button("Analyze") d = { 0: 'Safe', 1: 'Unsafe' } # Preprocessing function def preprocess_link(link): # Convert the link to lowercase processed_link = link.lower() # Apply necessary transformations or feature engineering based on the existing code processed_link = re.sub(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', '', processed_link) processed_link = re.sub(r'^https?:\/\/', '', processed_link) processed_link = re.sub(r'www\.', '', processed_link) processed_link = re.sub(r'\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$', '', processed_link) return processed_link if user_input and button: input = user_input user_input = preprocess_link(input) # Tokenize the input inputs = tokenizer.encode_plus( user_input, add_special_tokens=True, max_length=512, padding='max_length', truncation=True, return_tensors='tf' ) # Convert input tensors to numpy arrays input_ids = inputs['input_ids'].numpy() attention_mask = inputs['attention_mask'].numpy() token_type_ids = inputs['token_type_ids'].numpy() # Get the signature_def signature_def = model.signatures["serving_default"] # Run the model prediction output = signature_def( input_ids=tf.constant(input_ids), attention_mask=tf.constant(attention_mask), token_type_ids=tf.constant(token_type_ids) ) # Print the output tensor keys print(output.keys()) # Perform the prediction using the correct output key y_pred = np.argmax(output['logits'].numpy(), axis=1) st.write("Prediction:", d[y_pred[0]]) if __name__ == "__main__": main()