Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,10 +23,14 @@ mpl.rcParams.update(mpl.rcParamsDefault)
|
|
23 |
df = pd.read_parquet('virus_ds.parquet')
|
24 |
virus = df['Organism_Name'].unique()
|
25 |
virus = {v: v for v in virus}
|
26 |
-
df_new = pd.read_parquet("virus.parquet")
|
27 |
df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
|
28 |
filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
|
|
|
|
|
|
|
29 |
del df_new
|
|
|
30 |
virus_new = {v: v for v in filter_species}
|
31 |
loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
|
32 |
model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
|
@@ -89,11 +93,7 @@ with ui.navset_card_tab(id="tab"):
|
|
89 |
|
90 |
@render.plot()
|
91 |
def plot_distro():
|
92 |
-
df =
|
93 |
-
df = df.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
|
94 |
-
filter_species = df.organism_name.value_counts().reset_index()[df.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
|
95 |
-
|
96 |
-
df = df[df["organism_name"].isin(input.virus_selector_1())]
|
97 |
grouped = df.groupby("organism_name")["seq"].apply(list)
|
98 |
return plot_distrobutions(grouped, grouped.index, input.basepair())
|
99 |
|
|
|
23 |
df = pd.read_parquet('virus_ds.parquet')
|
24 |
virus = df['Organism_Name'].unique()
|
25 |
virus = {v: v for v in virus}
|
26 |
+
df_new = pd.read_parquet("virus.parquet", columns= ['organism_name'])
|
27 |
df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
|
28 |
filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
|
29 |
+
|
30 |
+
df_old = pd.read_parquet("virus.parquet", columns =['seq', 'organism_name'])
|
31 |
+
MASTER_DF = df_old[df_old['organism_name'].isin(filter_species)].copy()
|
32 |
del df_new
|
33 |
+
del df_old
|
34 |
virus_new = {v: v for v in filter_species}
|
35 |
loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
|
36 |
model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
|
|
|
93 |
|
94 |
@render.plot()
|
95 |
def plot_distro():
|
96 |
+
df = MASTER_DF[MASTER_DF["organism_name"].isin(input.virus_selector_1())].copy()
|
|
|
|
|
|
|
|
|
97 |
grouped = df.groupby("organism_name")["seq"].apply(list)
|
98 |
return plot_distrobutions(grouped, grouped.index, input.basepair())
|
99 |
|