Spaces:
Runtime error
Runtime error
Mariusz Kossakowski
commited on
Commit
•
f10673c
1
Parent(s):
8eb9cdc
Black formatting
Browse files
clarin_datasets/kpwr_ner_datasets.py
CHANGED
@@ -72,7 +72,9 @@ class KpwrNerDataset(DatasetToShow):
|
|
72 |
full_dataframe = pd.concat(self.data_dict.values(), axis="rows")
|
73 |
tokens_all = full_dataframe["tokens"].tolist()
|
74 |
tokens_all = [x for subarray in tokens_all for x in subarray]
|
75 |
-
labels_all = pd.concat(self.data_dict_named.values(), axis="rows")[
|
|
|
|
|
76 |
labels_all = [x for subarray in labels_all for x in subarray]
|
77 |
|
78 |
with dataframe_head:
|
@@ -93,9 +95,9 @@ class KpwrNerDataset(DatasetToShow):
|
|
93 |
all_labels_from_subset = pd.Series(all_labels_from_subset)
|
94 |
class_distribution_dict[subset] = (
|
95 |
all_labels_from_subset.value_counts(normalize=True)
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
)
|
100 |
|
101 |
class_distribution_df = pd.merge(
|
@@ -117,7 +119,10 @@ class KpwrNerDataset(DatasetToShow):
|
|
117 |
"ner": labels_all,
|
118 |
}
|
119 |
)
|
120 |
-
full_df_unzipped = full_df_unzipped.loc[
|
|
|
|
|
|
|
121 |
possible_options = sorted(full_df_unzipped["ner"].unique())
|
122 |
with most_common_tokens:
|
123 |
st.header("10 most common tokens from selected class (without 'O')")
|
|
|
72 |
full_dataframe = pd.concat(self.data_dict.values(), axis="rows")
|
73 |
tokens_all = full_dataframe["tokens"].tolist()
|
74 |
tokens_all = [x for subarray in tokens_all for x in subarray]
|
75 |
+
labels_all = pd.concat(self.data_dict_named.values(), axis="rows")[
|
76 |
+
"ner"
|
77 |
+
].tolist()
|
78 |
labels_all = [x for subarray in labels_all for x in subarray]
|
79 |
|
80 |
with dataframe_head:
|
|
|
95 |
all_labels_from_subset = pd.Series(all_labels_from_subset)
|
96 |
class_distribution_dict[subset] = (
|
97 |
all_labels_from_subset.value_counts(normalize=True)
|
98 |
+
.sort_index()
|
99 |
+
.reset_index()
|
100 |
+
.rename({"index": "class", 0: subset}, axis="columns")
|
101 |
)
|
102 |
|
103 |
class_distribution_df = pd.merge(
|
|
|
119 |
"ner": labels_all,
|
120 |
}
|
121 |
)
|
122 |
+
full_df_unzipped = full_df_unzipped.loc[
|
123 |
+
(full_df_unzipped["ner"] != "O")
|
124 |
+
& (full_df_unzipped["ner"].str.starstwith("I-"))
|
125 |
+
]
|
126 |
possible_options = sorted(full_df_unzipped["ner"].unique())
|
127 |
with most_common_tokens:
|
128 |
st.header("10 most common tokens from selected class (without 'O')")
|