egumasa commited on
Commit
96fd379
1 Parent(s): f98ea1e

new analysis

Browse files
Files changed (1) hide show
  1. utils/visualize.py +43 -42
utils/visualize.py CHANGED
@@ -104,47 +104,48 @@ def visualize_spans(
104
  st.dataframe(
105
  df.style.highlight_between(subset='Conf. score', right=.7))
106
 
107
- st.subheader("Label counts & Diagnostic confidence score summary")
108
- counts = df['label_'].value_counts().reindex(CATEGORIES, fill_value=0)
109
-
110
- print(counts)
111
- print(list(counts))
112
- label_counts = df.groupby('label_').agg({
113
- "label_":
114
- 'count',
115
- "Conf. score": ['median', 'min', 'max']
116
- }).round(4).reindex(CATEGORIES, fill_value=0)
117
-
118
- st.dataframe(label_counts)
119
- # print(list(label_counts))
120
-
121
- sequences = list(df['label_'])
122
- # Engagement ngrams
123
- span_bigrams = ngrammar(seq=sequences, n=2, concat=True)
124
- span_trigrams = ngrammar(seq=sequences, n=3, concat=True)
125
-
126
- st.dataframe(pd.DataFrame(span_bigrams))
127
- st.code(span_trigrams)
128
-
129
-
130
- st.subheader("Engagement label by grammatical function")
131
- label_dep = pd.crosstab(df['grammatical realization'], df['label_'])
132
- st.dataframe(label_dep)
133
-
134
- st.subheader('Quantitative results')
135
- # st.markdown(
136
- # f"Shannon's index: {dv.alpha.shannon(list(counts), base=2): .3f}")
137
- # st.markdown(
138
- # f"Simpson's e index: {1 - dv.alpha.simpson_e(list(counts)): .3f}")
139
-
140
- div = diversity_values(list(counts))
141
- div_data = pd.DataFrame.from_dict(div, orient='index')
142
- st.dataframe(div_data)
143
-
144
- doc_data = pd.concat([counts, div_data], axis = 0).T
145
- filename = "NA"
146
- doc_data.insert(0, "filename", filename, True)
147
- doc_data.insert(1, "nwords", len(doc), True)
148
- st.dataframe(doc_data)
 
149
  # st.markdown(str(dv.alpha_diversity(metric = "shannon", counts=counts, ids = ['ENTERTAIN', 'ATTRIBUTE', 'CITATION', 'COUNTER', 'DENY', 'ENDORSE', 'PRONOUNCE', 'CONCUR', 'MONOGLOSS', 'SOURCES', 'JUSTIFYING'])))
150
  # print(dv.get_alpha_diversity_metrics())
 
104
  st.dataframe(
105
  df.style.highlight_between(subset='Conf. score', right=.7))
106
 
107
+ if not simple:
108
+ st.subheader("Label counts & Diagnostic confidence score summary")
109
+ counts = df['label_'].value_counts().reindex(CATEGORIES, fill_value=0)
110
+
111
+ print(counts)
112
+ print(list(counts))
113
+ label_counts = df.groupby('label_').agg({
114
+ "label_":
115
+ 'count',
116
+ "Conf. score": ['median', 'min', 'max']
117
+ }).round(4).reindex(CATEGORIES, fill_value=0)
118
+
119
+ st.dataframe(label_counts)
120
+ # print(list(label_counts))
121
+
122
+ sequences = list(df['label_'])
123
+ # Engagement ngrams
124
+ span_bigrams = ngrammar(seq=sequences, n=2, concat=True)
125
+ span_trigrams = ngrammar(seq=sequences, n=3, concat=True)
126
+
127
+ st.dataframe(pd.DataFrame(span_bigrams))
128
+ st.code(span_trigrams)
129
+
130
+
131
+ st.subheader("Engagement label by grammatical function")
132
+ label_dep = pd.crosstab(df['grammatical realization'], df['label_'])
133
+ st.dataframe(label_dep)
134
+
135
+ st.subheader('Quantitative results')
136
+ # st.markdown(
137
+ # f"Shannon's index: {dv.alpha.shannon(list(counts), base=2): .3f}")
138
+ # st.markdown(
139
+ # f"Simpson's e index: {1 - dv.alpha.simpson_e(list(counts)): .3f}")
140
+
141
+ div = diversity_values(list(counts))
142
+ div_data = pd.DataFrame.from_dict(div, orient='index')
143
+ st.dataframe(div_data)
144
+
145
+ doc_data = pd.concat([counts, div_data], axis = 0).T
146
+ filename = "NA"
147
+ doc_data.insert(0, "filename", filename, True)
148
+ doc_data.insert(1, "nwords", len(doc), True)
149
+ st.dataframe(doc_data)
150
  # st.markdown(str(dv.alpha_diversity(metric = "shannon", counts=counts, ids = ['ENTERTAIN', 'ATTRIBUTE', 'CITATION', 'COUNTER', 'DENY', 'ENDORSE', 'PRONOUNCE', 'CONCUR', 'MONOGLOSS', 'SOURCES', 'JUSTIFYING'])))
151
  # print(dv.get_alpha_diversity_metrics())