Spaces:
Sleeping
Sleeping
File size: 1,513 Bytes
9c7bd1d 36358e5 9c7bd1d 36358e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import pickle
import sys
def greet(name):
return "Hello " + name + "!!"
# load the CountVectorizer from disk
cv = pickle.load(open('countVectTrain.pkl', 'rb'))
# load the model from disk
filename = 'corona_pred.pkl'
model = pickle.load(open(filename, 'rb'))
# function to convert sequence string into k-mer words, default size = 6 (hexamer words)
kmer_size = 6
def getKmers(sequence, size=kmer_size):
return [sequence[x:x+size].lower() for x in range(len(sequence) - size + 1)]
# define the Gradio interface
def classify_sequence(sequence):
# convert the input sequence into k-mer words
words = getKmers(sequence)
# convert the k-mer words into a list of space-separated strings
text = ' '.join(words)
# vectorize the text using Count Vectorization
X = cv.transform([text])
# make predictions using the pre-trained model
pred_label = model.predict(X)[0]
pred_prob_percentage = model.predict_proba(X).max()*100
# return the predicted class and probability
return {'predicted_class': pred_label, 'probability': pred_prob_percentage}
iface = gr.Interface(fn=classify_sequence, inputs="text", outputs=["text"],
title="Coronavirus Sequence Classifier",
description="Enter a coronavirus sequence to predict its class and probability.")
iface.launch() |