Spaces:

mawairon
/

NOOTestspace

Sleeping

App Files Files Community

mawairon commited on Jun 26, 2024

Commit

66990c3

•

1 Parent(s): dc7d693

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -26

app.py CHANGED Viewed

@@ -3,17 +3,18 @@ import transformers
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
 import torch
 import torch.nn as nn
-import matplotlib.pyplot as plt
 import pandas as pd
 class LogisticRegressionTorch(nn.Module):
-    def __init__(self,
-                 input_dim: int,
-                 output_dim: int):
         super(LogisticRegressionTorch, self).__init__()
         self.batch_norm = nn.BatchNorm1d(num_features=input_dim)
         self.linear = nn.Linear(input_dim, output_dim)
@@ -25,11 +26,7 @@ class LogisticRegressionTorch(nn.Module):
 class BertClassifier(nn.Module):
-    def __init__(self,
-                 bert_model: AutoModel,
-                 classifier: LogisticRegressionTorch,
-                 num_labels: int):
         super(BertClassifier, self).__init__()
         self.bert = bert_model  # Assume bert_model is an instance of a pre-trained BertModel
         self.classifier = classifier
@@ -61,22 +58,20 @@ class BertClassifier(nn.Module):
         # Return the loss and logits
         return loss, logits
 # Load the Hugging Face model and tokenizer
 metadata_features = 0
-N_UNIQUE_CLASSES = 38 ## or 38
 base_model = AutoModel.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True, output_hidden_states=True)
 tokenizer = AutoTokenizer.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True)
 # Initialize the classifier
-input_size = 768 + metadata_features # featurizer output size + metadata size
 log_reg = LogisticRegressionTorch(input_dim=input_size, output_dim=N_UNIQUE_CLASSES)
 # Load Weights
-model_weights_path = 'gena-blastln-bs33-lr4e-05-S168.pth'
 weights = torch.load(model_weights_path, map_location=torch.device('cpu'))
 base_model.load_state_dict(weights['model_state_dict'])
@@ -84,11 +79,6 @@ log_reg.load_state_dict(weights['log_reg_state_dict'])
 # Creating Model
 model = BertClassifier(base_model, log_reg, num_labels=N_UNIQUE_CLASSES)
-model.eval()
-# Dictionary to decode model predictions
-label_to_int = pd.read_pkl('label_to_int.pkl')
-int_to_label = {v: k for k, v in label_to_int.items()}
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
@@ -113,20 +103,27 @@ def analyze_dna(sequence):
     top_5_labels = [int_to_label[i] for i in top_5_indices]
     # Prepare the output as a list of tuples (label_name, probability)
-    #result = [(label, prob) for label, prob in zip(top_5_labels, top_5_probs)]
     # Plot histogram
     fig, ax = plt.subplots(figsize=(10, 6))
     ax.barh(top_5_labels, top_5_probs, color='skyblue')
     ax.set_xlabel('Probability')
     ax.set_title('Top 5 Most Likely Labels')
     plt.gca().invert_yaxis()  # Highest probabilities at the top
-    #return result
 # Create a Gradio interface
-demo = gr.Interface(fn=analyze_dna, inputs="text", outputs="json")
 # Launch the interface
 demo.launch()

 from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
 import torch
 import torch.nn as nn
 import pandas as pd
+import matplotlib.pyplot as plt
+import io
+import base64
+# Assuming label_to_int is a dictionary with {label_name: label_index}
+label_to_int = pd.read_pickle('label_to_int.pkl')
+int_to_label = {v: k for k, v in label_to_int.items()}
 class LogisticRegressionTorch(nn.Module):
+    def __init__(self, input_dim: int, output_dim: int):
         super(LogisticRegressionTorch, self).__init__()
         self.batch_norm = nn.BatchNorm1d(num_features=input_dim)
         self.linear = nn.Linear(input_dim, output_dim)
 class BertClassifier(nn.Module):
+    def __init__(self, bert_model: AutoModel, classifier: LogisticRegressionTorch, num_labels: int):
         super(BertClassifier, self).__init__()
         self.bert = bert_model  # Assume bert_model is an instance of a pre-trained BertModel
         self.classifier = classifier
         # Return the loss and logits
         return loss, logits
 # Load the Hugging Face model and tokenizer
 metadata_features = 0
+N_UNIQUE_CLASSES = 38  # or 38
 base_model = AutoModel.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True, output_hidden_states=True)
 tokenizer = AutoTokenizer.from_pretrained('AIRI-Institute/gena-lm-bert-base-lastln-t2t', trust_remote_code=True)
 # Initialize the classifier
+input_size = 768 + metadata_features  # featurizer output size + metadata size
 log_reg = LogisticRegressionTorch(input_dim=input_size, output_dim=N_UNIQUE_CLASSES)
 # Load Weights
+model_weights_path = 'model/gena-blastln-bs33-lr4e-05-S168.pth'
 weights = torch.load(model_weights_path, map_location=torch.device('cpu'))
 base_model.load_state_dict(weights['model_state_dict'])
 # Creating Model
 model = BertClassifier(base_model, log_reg, num_labels=N_UNIQUE_CLASSES)
 # Define a function to process the DNA sequence
 def analyze_dna(sequence):
     top_5_labels = [int_to_label[i] for i in top_5_indices]
     # Prepare the output as a list of tuples (label_name, probability)
+    result = [(label, prob) for label, prob in zip(top_5_labels, top_5_probs)]
     # Plot histogram
     fig, ax = plt.subplots(figsize=(10, 6))
     ax.barh(top_5_labels, top_5_probs, color='skyblue')
     ax.set_xlabel('Probability')
     ax.set_title('Top 5 Most Likely Labels')
     plt.gca().invert_yaxis()  # Highest probabilities at the top
+    # Save plot to a PNG image in memory
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    image_base64 = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    return result, f'<img src="data:image/png;base64,{image_base64}" />'
 # Create a Gradio interface
+demo = gr.Interface(fn=analyze_dna, inputs="text", outputs=["json", "html"])
 # Launch the interface
 demo.launch()