avsolatorio
commited on
Add example of taking topics from abstracts
Browse files
README.md
CHANGED
@@ -111,6 +111,8 @@ import nltk
|
|
111 |
nltk.download('punkt_tab')
|
112 |
nltk.download('punkt')
|
113 |
|
|
|
|
|
114 |
# Load the sent_tokenize method for quick sentence extraction
|
115 |
from nltk import sent_tokenize
|
116 |
|
@@ -137,4 +139,8 @@ outs
|
|
137 |
# 'score_mean': 0.19432228063233198,
|
138 |
# 'score_std': 0.21148874269682794,
|
139 |
# 'doc_idx': 0}, ...]]
|
|
|
|
|
|
|
|
|
140 |
```
|
|
|
111 |
nltk.download('punkt_tab')
|
112 |
nltk.download('punkt')
|
113 |
|
114 |
+
from collections import Counter
|
115 |
+
|
116 |
# Load the sent_tokenize method for quick sentence extraction
|
117 |
from nltk import sent_tokenize
|
118 |
|
|
|
139 |
# 'score_mean': 0.19432228063233198,
|
140 |
# 'score_std': 0.21148874269682794,
|
141 |
# 'doc_idx': 0}, ...]]
|
142 |
+
|
143 |
+
# Get the distribution of the abstract's highly relevant topics per sentence.
|
144 |
+
# Use a currently arbitrary threshold of 0.1.
|
145 |
+
Counter([o["label"] for out in outs for o in out if (o["score_mean"] > 0.1 and o["score_mean"] > o["score_std"])]).most_common()
|
146 |
```
|