Spaces:
Running
Running
from sklearn.base import BaseEstimator, TransformerMixin | |
from bio_embeddings.embed.seqvec_embedder import SeqVecEmbedder | |
import pandas as pd | |
from sklearn.feature_selection import SelectKBest | |
class Embedder(BaseEstimator, TransformerMixin): | |
def __init__(self) -> None: | |
self.embedder = SeqVecEmbedder() | |
super().__init__() | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X: pd.DataFrame): | |
return X.apply(self.embeddings, axis='columns') | |
def embeddings(self, row): | |
sequence = row[0] | |
per_residue = self.embedder.embed(sequence) | |
per_protein = self.embedder.reduce_per_protein(per_residue) | |
return pd.Series(per_protein) | |
class FeaturesSelector(BaseEstimator, TransformerMixin): | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X: pd.DataFrame): | |
return self.features_selector.transform(X) |