Spaces:
Sleeping
Sleeping
Daryl Fung
commited on
Commit
•
9e61b41
1
Parent(s):
01ea445
fix bar plot
Browse files- keyphrase_extraction.py +2 -2
- keyword_extraction.py +2 -2
- main.py +3 -3
- results/DAD/1_gram.png +0 -0
- results/DAD/2_gram.png +0 -0
- results/DAD/3_gram.png +0 -0
- results/DAD/ner.html +2 -2
- word.py +2 -2
keyphrase_extraction.py
CHANGED
@@ -34,8 +34,8 @@ def get_top_key_phrases(text, top_n, save_output):
|
|
34 |
scores = list(zip(*phrases_ranking.items()))[1]
|
35 |
keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
|
36 |
plt.title("Word Count")
|
37 |
-
plt.figure(figsize=(
|
38 |
-
sns.
|
39 |
plt.xticks(rotation=-10, fontsize=6)
|
40 |
plt.savefig(save_output, dpi=300)
|
41 |
plt.close()
|
|
|
34 |
scores = list(zip(*phrases_ranking.items()))[1]
|
35 |
keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
|
36 |
plt.title("Word Count")
|
37 |
+
plt.figure(figsize=(24, 8))
|
38 |
+
sns.barplot(data=keyword_df, y='words', x='scores', palette='blend:#7AB,#EDA', aspect=1.5)
|
39 |
plt.xticks(rotation=-10, fontsize=6)
|
40 |
plt.savefig(save_output, dpi=300)
|
41 |
plt.close()
|
keyword_extraction.py
CHANGED
@@ -37,8 +37,8 @@ def keyword_extract(doc, kw_model, n_grams, save_output='results/'):
|
|
37 |
scores = list(zip(*keyword_onegram))[1]
|
38 |
keyword_df = pd.DataFrame({'words': words, 'scores': scores})
|
39 |
plt.title("Word Count")
|
40 |
-
plt.figure(figsize=(
|
41 |
-
sns.
|
42 |
plt.xticks(rotation=-10, fontsize=8-n_grams)
|
43 |
plt.savefig(save_output, dpi=300)
|
44 |
plt.close()
|
|
|
37 |
scores = list(zip(*keyword_onegram))[1]
|
38 |
keyword_df = pd.DataFrame({'words': words, 'scores': scores})
|
39 |
plt.title("Word Count")
|
40 |
+
plt.figure(figsize=(24, 8))
|
41 |
+
sns.barplot(data=keyword_df, y='words', x='scores', palette='blend:#7AB,#EDA', aspect=1.5)
|
42 |
plt.xticks(rotation=-10, fontsize=8-n_grams)
|
43 |
plt.savefig(save_output, dpi=300)
|
44 |
plt.close()
|
main.py
CHANGED
@@ -71,9 +71,9 @@ filename.open('w', encoding='utf-8').write(svg)
|
|
71 |
|
72 |
# run keyword extraction
|
73 |
kw_model = KeyBERT()
|
74 |
-
keyword_extract(
|
75 |
-
keyword_extract(
|
76 |
-
keyword_extract(
|
77 |
keywords = kw_model.extract_keywords(doc, highlight=True)
|
78 |
print(keywords)
|
79 |
|
|
|
71 |
|
72 |
# run keyword extraction
|
73 |
kw_model = KeyBERT()
|
74 |
+
keyword_extract(doc, kw_model, 1, save_output=f'results/DAD/{1}_keyword.png')
|
75 |
+
keyword_extract(doc, kw_model, 2, save_output=f'results/DAD/{2}_keyword.png')
|
76 |
+
keyword_extract(doc, kw_model, 3, save_output=f'results/DAD/{3}_keyword.png')
|
77 |
keywords = kw_model.extract_keywords(doc, highlight=True)
|
78 |
print(keywords)
|
79 |
|
results/DAD/1_gram.png
CHANGED
results/DAD/2_gram.png
CHANGED
results/DAD/3_gram.png
CHANGED
results/DAD/ner.html
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
|
7 |
<body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
|
8 |
<figure style="margin-bottom: 6rem">
|
9 |
-
<div class="entities" style="line-height: 2.5; direction: ltr">CIHI</br>Database that collects administrative clinical and demographic information on hospital discharges including deaths signouts and transfers Some provinces and territories also use the
|
10 |
<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
11 |
DAD
|
12 |
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>
|
@@ -211,7 +211,7 @@
|
|
211 |
Captures
|
212 |
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
|
213 |
</mark>
|
214 |
-
administrative clinical and demographic information on discharges for acute care facilities including deaths signouts and transfers</br
|
215 |
</figure>
|
216 |
</body>
|
217 |
</html>
|
|
|
6 |
|
7 |
<body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
|
8 |
<figure style="margin-bottom: 6rem">
|
9 |
+
<div class="entities" style="line-height: 2.5; direction: ltr"></br>CIHI</br>Database that collects administrative clinical and demographic information on hospital discharges including deaths signouts and transfers Some provinces and territories also use the
|
10 |
<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
11 |
DAD
|
12 |
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>
|
|
|
211 |
Captures
|
212 |
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
|
213 |
</mark>
|
214 |
+
administrative clinical and demographic information on discharges for acute care facilities including deaths signouts and transfers</br> </div>
|
215 |
</figure>
|
216 |
</body>
|
217 |
</html>
|
word.py
CHANGED
@@ -33,8 +33,8 @@ def show_gram_plot(doc, n_grams, top_words=10, save_output='results/'):
|
|
33 |
words, count = zip(*count_tokens)
|
34 |
tokens_df = pd.DataFrame({'word': words, 'count': count})
|
35 |
plt.title("Word Count")
|
36 |
-
plt.figure(figsize=(
|
37 |
-
sns.
|
38 |
plt.xticks(rotation=-10, fontsize=8-n_grams)
|
39 |
plt.savefig(save_output, dpi=300)
|
40 |
plt.close()
|
|
|
33 |
words, count = zip(*count_tokens)
|
34 |
tokens_df = pd.DataFrame({'word': words, 'count': count})
|
35 |
plt.title("Word Count")
|
36 |
+
plt.figure(figsize=(24, 8))
|
37 |
+
sns.barplot(data=tokens_df, y='word', x='count', palette='blend:#7AB,#EDA')
|
38 |
plt.xticks(rotation=-10, fontsize=8-n_grams)
|
39 |
plt.savefig(save_output, dpi=300)
|
40 |
plt.close()
|