avsolatorio
/

wbg-doc-topic-prediction

avsolatorio commited on Oct 19, 2024

Commit

640ac37

verified ·

1 Parent(s): 09767db

Add example of taking topics from abstracts

Files changed (1) hide show

README.md CHANGED Viewed

@@ -111,6 +111,8 @@ import nltk
 nltk.download('punkt_tab')
 nltk.download('punkt')
 # Load the sent_tokenize method for quick sentence extraction
 from nltk import sent_tokenize
@@ -137,4 +139,8 @@ outs
 #   'score_mean': 0.19432228063233198,
 #   'score_std': 0.21148874269682794,
 #   'doc_idx': 0}, ...]]
 ```

 nltk.download('punkt_tab')
 nltk.download('punkt')
+from collections import Counter
 # Load the sent_tokenize method for quick sentence extraction
 from nltk import sent_tokenize
 #   'score_mean': 0.19432228063233198,
 #   'score_std': 0.21148874269682794,
 #   'doc_idx': 0}, ...]]
+# Get the distribution of the abstract's highly relevant topics per sentence.
+# Use a currently arbitrary threshold of 0.1.
+Counter([o["label"] for out in outs for o in out if (o["score_mean"] > 0.1 and o["score_mean"] > o["score_std"])]).most_common()
 ```