Spaces:
Sleeping
Sleeping
import pandas as pd | |
import joblib | |
def predict_sequence_label(sequence): | |
""" | |
Predict the label for a given amino acid sequence using the saved Random Forest model. | |
Parameters: | |
sequence (str): A string representing the amino acid sequence. | |
Returns: | |
int: The predicted label (0 or 1). | |
""" | |
def compute_aac_features(sequence): | |
""" | |
Compute the Amino Acid Composition (AAC) features for a given sequence. | |
Parameters: | |
sequence (str): A string representing the amino acid sequence. | |
Returns: | |
pd.DataFrame: DataFrame containing the AAC features for the sequence. | |
""" | |
# Define the 20 standard amino acids | |
amino_acids = 'ACDEFGHIKLMNPQRSTVWY' | |
# Initialize a dictionary to hold the counts of each amino acid | |
aac_counts = {f"AAC_{aa}": 0 for aa in amino_acids} | |
# Calculate the length of the sequence | |
seq_length = len(sequence) | |
# Count the occurrences of each amino acid in the sequence | |
for aa in sequence: | |
if f"AAC_{aa}" in aac_counts: | |
aac_counts[f"AAC_{aa}"] += 1 | |
# Convert counts to frequencies | |
aac_features = {aa: count / seq_length for aa, count in aac_counts.items()} | |
# Convert the AAC features to a DataFrame | |
aac_features_df = pd.DataFrame([aac_features]) | |
return aac_features_df | |
# Compute AAC features from the sequence | |
aac_features_df = compute_aac_features(sequence) | |
# Load the saved Random Forest model | |
saved_model = joblib.load('model.joblib') | |
# Predict using the loaded model | |
prediction = saved_model.predict(aac_features_df) | |
return prediction[0] | |
# Example usage: | |
# sequence = "YOUR_AMINO_ACID_SEQUENCE_HERE" | |
# print(predict_sequence_label(sequence)) | |