Spaces:
Sleeping
Sleeping
Daryl Fung
commited on
Commit
·
f1a4ffa
1
Parent(s):
5051113
added plot close to free memory
Browse files- keyphrase_extraction.py +2 -0
- keyword_extraction.py +2 -0
- word.py +3 -0
keyphrase_extraction.py
CHANGED
@@ -33,10 +33,12 @@ def get_top_key_phrases(text, top_n, save_output):
|
|
33 |
phrases = list(zip(*phrases_ranking.items()))[0]
|
34 |
scores = list(zip(*phrases_ranking.items()))[1]
|
35 |
keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
|
|
|
36 |
plt.figure(figsize=(8, 24))
|
37 |
sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
|
38 |
plt.xticks(rotation=-10, fontsize=6)
|
39 |
plt.savefig(save_output, dpi=300)
|
|
|
40 |
|
41 |
def display_key_phrases(text, save_output):
|
42 |
doc = nlp(text)
|
|
|
33 |
phrases = list(zip(*phrases_ranking.items()))[0]
|
34 |
scores = list(zip(*phrases_ranking.items()))[1]
|
35 |
keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
|
36 |
+
plt.title("Word Count")
|
37 |
plt.figure(figsize=(8, 24))
|
38 |
sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
|
39 |
plt.xticks(rotation=-10, fontsize=6)
|
40 |
plt.savefig(save_output, dpi=300)
|
41 |
+
plt.close()
|
42 |
|
43 |
def display_key_phrases(text, save_output):
|
44 |
doc = nlp(text)
|
keyword_extraction.py
CHANGED
@@ -36,10 +36,12 @@ def keyword_extract(doc, kw_model, n_grams, save_output='results/'):
|
|
36 |
words = list(zip(*keyword_onegram))[0]
|
37 |
scores = list(zip(*keyword_onegram))[1]
|
38 |
keyword_df = pd.DataFrame({'words': words, 'scores': scores})
|
|
|
39 |
plt.figure(figsize=(8, 24))
|
40 |
sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
|
41 |
plt.xticks(rotation=-10, fontsize=6)
|
42 |
plt.savefig(save_output, dpi=300)
|
|
|
43 |
|
44 |
if __name__ == '__main__':
|
45 |
kw_model = KeyBERT()
|
|
|
36 |
words = list(zip(*keyword_onegram))[0]
|
37 |
scores = list(zip(*keyword_onegram))[1]
|
38 |
keyword_df = pd.DataFrame({'words': words, 'scores': scores})
|
39 |
+
plt.title("Word Count")
|
40 |
plt.figure(figsize=(8, 24))
|
41 |
sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
|
42 |
plt.xticks(rotation=-10, fontsize=6)
|
43 |
plt.savefig(save_output, dpi=300)
|
44 |
+
plt.close()
|
45 |
|
46 |
if __name__ == '__main__':
|
47 |
kw_model = KeyBERT()
|
word.py
CHANGED
@@ -32,10 +32,13 @@ def show_gram_plot(doc, n_grams, top_words=10, save_output='results/'):
|
|
32 |
count_tokens = Counter(tokens).most_common(top_words)
|
33 |
words, count = zip(*count_tokens)
|
34 |
tokens_df = pd.DataFrame({'word': words, 'count': count})
|
|
|
35 |
plt.figure(figsize=(8, 24))
|
36 |
sns.catplot(data=tokens_df, x='word', y='count', kind='bar', palette='blend:#7AB,#EDA')
|
37 |
plt.xticks(rotation=-10, fontsize=6)
|
38 |
plt.savefig(save_output, dpi=300)
|
|
|
|
|
39 |
|
40 |
if __name__ == '__main__':
|
41 |
lemmatized_docs = preprocess_text([doc])
|
|
|
32 |
count_tokens = Counter(tokens).most_common(top_words)
|
33 |
words, count = zip(*count_tokens)
|
34 |
tokens_df = pd.DataFrame({'word': words, 'count': count})
|
35 |
+
plt.title("Word Count")
|
36 |
plt.figure(figsize=(8, 24))
|
37 |
sns.catplot(data=tokens_df, x='word', y='count', kind='bar', palette='blend:#7AB,#EDA')
|
38 |
plt.xticks(rotation=-10, fontsize=6)
|
39 |
plt.savefig(save_output, dpi=300)
|
40 |
+
plt.close()
|
41 |
+
|
42 |
|
43 |
if __name__ == '__main__':
|
44 |
lemmatized_docs = preprocess_text([doc])
|