import pandas as pd import joblib def predict_sequence_label(sequence): """ Predict the label for a given amino acid sequence using the saved Random Forest model. Parameters: sequence (str): A string representing the amino acid sequence. Returns: int: The predicted label (0 or 1). """ def compute_aac_features(sequence): """ Compute the Amino Acid Composition (AAC) features for a given sequence. Parameters: sequence (str): A string representing the amino acid sequence. Returns: pd.DataFrame: DataFrame containing the AAC features for the sequence. """ # Define the 20 standard amino acids amino_acids = 'ACDEFGHIKLMNPQRSTVWY' # Initialize a dictionary to hold the counts of each amino acid aac_counts = {f"AAC_{aa}": 0 for aa in amino_acids} # Calculate the length of the sequence seq_length = len(sequence) # Count the occurrences of each amino acid in the sequence for aa in sequence: if f"AAC_{aa}" in aac_counts: aac_counts[f"AAC_{aa}"] += 1 # Convert counts to frequencies aac_features = {aa: count / seq_length for aa, count in aac_counts.items()} # Convert the AAC features to a DataFrame aac_features_df = pd.DataFrame([aac_features]) return aac_features_df # Compute AAC features from the sequence aac_features_df = compute_aac_features(sequence) # Load the saved Random Forest model saved_model = joblib.load('model.joblib') # Predict using the loaded model prediction = saved_model.predict(aac_features_df) return prediction[0] # Example usage: # sequence = "YOUR_AMINO_ACID_SEQUENCE_HERE" # print(predict_sequence_label(sequence))