new analysis
Browse files- utils/visualize.py +43 -42
utils/visualize.py
CHANGED
@@ -104,47 +104,48 @@ def visualize_spans(
|
|
104 |
st.dataframe(
|
105 |
df.style.highlight_between(subset='Conf. score', right=.7))
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
149 |
# st.markdown(str(dv.alpha_diversity(metric = "shannon", counts=counts, ids = ['ENTERTAIN', 'ATTRIBUTE', 'CITATION', 'COUNTER', 'DENY', 'ENDORSE', 'PRONOUNCE', 'CONCUR', 'MONOGLOSS', 'SOURCES', 'JUSTIFYING'])))
|
150 |
# print(dv.get_alpha_diversity_metrics())
|
|
|
104 |
st.dataframe(
|
105 |
df.style.highlight_between(subset='Conf. score', right=.7))
|
106 |
|
107 |
+
if not simple:
|
108 |
+
st.subheader("Label counts & Diagnostic confidence score summary")
|
109 |
+
counts = df['label_'].value_counts().reindex(CATEGORIES, fill_value=0)
|
110 |
+
|
111 |
+
print(counts)
|
112 |
+
print(list(counts))
|
113 |
+
label_counts = df.groupby('label_').agg({
|
114 |
+
"label_":
|
115 |
+
'count',
|
116 |
+
"Conf. score": ['median', 'min', 'max']
|
117 |
+
}).round(4).reindex(CATEGORIES, fill_value=0)
|
118 |
+
|
119 |
+
st.dataframe(label_counts)
|
120 |
+
# print(list(label_counts))
|
121 |
+
|
122 |
+
sequences = list(df['label_'])
|
123 |
+
# Engagement ngrams
|
124 |
+
span_bigrams = ngrammar(seq=sequences, n=2, concat=True)
|
125 |
+
span_trigrams = ngrammar(seq=sequences, n=3, concat=True)
|
126 |
+
|
127 |
+
st.dataframe(pd.DataFrame(span_bigrams))
|
128 |
+
st.code(span_trigrams)
|
129 |
+
|
130 |
+
|
131 |
+
st.subheader("Engagement label by grammatical function")
|
132 |
+
label_dep = pd.crosstab(df['grammatical realization'], df['label_'])
|
133 |
+
st.dataframe(label_dep)
|
134 |
+
|
135 |
+
st.subheader('Quantitative results')
|
136 |
+
# st.markdown(
|
137 |
+
# f"Shannon's index: {dv.alpha.shannon(list(counts), base=2): .3f}")
|
138 |
+
# st.markdown(
|
139 |
+
# f"Simpson's e index: {1 - dv.alpha.simpson_e(list(counts)): .3f}")
|
140 |
+
|
141 |
+
div = diversity_values(list(counts))
|
142 |
+
div_data = pd.DataFrame.from_dict(div, orient='index')
|
143 |
+
st.dataframe(div_data)
|
144 |
+
|
145 |
+
doc_data = pd.concat([counts, div_data], axis = 0).T
|
146 |
+
filename = "NA"
|
147 |
+
doc_data.insert(0, "filename", filename, True)
|
148 |
+
doc_data.insert(1, "nwords", len(doc), True)
|
149 |
+
st.dataframe(doc_data)
|
150 |
# st.markdown(str(dv.alpha_diversity(metric = "shannon", counts=counts, ids = ['ENTERTAIN', 'ATTRIBUTE', 'CITATION', 'COUNTER', 'DENY', 'ENDORSE', 'PRONOUNCE', 'CONCUR', 'MONOGLOSS', 'SOURCES', 'JUSTIFYING'])))
|
151 |
# print(dv.get_alpha_diversity_metrics())
|