Mariusz Kossakowski commited on
Commit
8eb9cdc
·
1 Parent(s): c76a6b2

Filtering fix

Browse files
clarin_datasets/kpwr_ner_datasets.py CHANGED
@@ -104,7 +104,7 @@ class KpwrNerDataset(DatasetToShow):
104
  on="class",
105
  )
106
  with class_distribution:
107
- st.header("Class distribution in each subset (without '0' and 'I-*')")
108
  st.dataframe(class_distribution_df)
109
  st.text_area(
110
  label="LaTeX code", value=class_distribution_df.style.to_latex()
@@ -117,10 +117,10 @@ class KpwrNerDataset(DatasetToShow):
117
  "ner": labels_all,
118
  }
119
  )
120
- full_df_unzipped = full_df_unzipped.loc[full_df_unzipped["ner"] != 0]
121
  possible_options = sorted(full_df_unzipped["ner"].unique())
122
  with most_common_tokens:
123
- st.header("10 most common tokens from selected class (without '0')")
124
  selected_class = st.selectbox(
125
  label="Select class to show", options=possible_options
126
  )
 
104
  on="class",
105
  )
106
  with class_distribution:
107
+ st.header("Class distribution in each subset (without 'O' and 'I-*')")
108
  st.dataframe(class_distribution_df)
109
  st.text_area(
110
  label="LaTeX code", value=class_distribution_df.style.to_latex()
 
117
  "ner": labels_all,
118
  }
119
  )
120
+ full_df_unzipped = full_df_unzipped.loc[full_df_unzipped["ner"] != "O" and not full_df_unzipped["ner"].str.starstwith("I-")]
121
  possible_options = sorted(full_df_unzipped["ner"].unique())
122
  with most_common_tokens:
123
+ st.header("10 most common tokens from selected class (without 'O')")
124
  selected_class = st.selectbox(
125
  label="Select class to show", options=possible_options
126
  )