Hack90 commited on
Commit
56cda59
1 Parent(s): 9ad1347

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -23,10 +23,14 @@ mpl.rcParams.update(mpl.rcParamsDefault)
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
26
- df_new = pd.read_parquet("virus.parquet")
27
  df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
28
  filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
 
 
 
29
  del df_new
 
30
  virus_new = {v: v for v in filter_species}
31
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
32
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
@@ -89,11 +93,7 @@ with ui.navset_card_tab(id="tab"):
89
 
90
  @render.plot()
91
  def plot_distro():
92
- df = pd.read_parquet("virus.parquet")
93
- df = df.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
94
- filter_species = df.organism_name.value_counts().reset_index()[df.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
95
-
96
- df = df[df["organism_name"].isin(input.virus_selector_1())]
97
  grouped = df.groupby("organism_name")["seq"].apply(list)
98
  return plot_distrobutions(grouped, grouped.index, input.basepair())
99
 
 
23
  df = pd.read_parquet('virus_ds.parquet')
24
  virus = df['Organism_Name'].unique()
25
  virus = {v: v for v in virus}
26
+ df_new = pd.read_parquet("virus.parquet", columns= ['organism_name'])
27
  df_new = df_new.groupby('organism_name').apply(lambda x: x.head(100) if len(x) > 10 else None).reset_index(drop=True)
28
  filter_species = df_new.organism_name.value_counts().reset_index()[df_new.organism_name.value_counts().reset_index()['count'] > 40 ]['organism_name'][1:].tolist()
29
+
30
+ df_old = pd.read_parquet("virus.parquet", columns =['seq', 'organism_name'])
31
+ MASTER_DF = df_old[df_old['organism_name'].isin(filter_species)].copy()
32
  del df_new
33
+ del df_old
34
  virus_new = {v: v for v in filter_species}
35
  loss_typesss = pd.read_csv("training_data_5.csv")['loss_type'].unique().tolist()
36
  model_typesss = pd.read_csv("training_data_5.csv")['model_type'].unique().tolist()
 
93
 
94
  @render.plot()
95
  def plot_distro():
96
+ df = MASTER_DF[MASTER_DF["organism_name"].isin(input.virus_selector_1())].copy()
 
 
 
 
97
  grouped = df.groupby("organism_name")["seq"].apply(list)
98
  return plot_distrobutions(grouped, grouped.index, input.basepair())
99