prashant
commited on
Commit
•
98746bf
1
Parent(s):
53e0cf4
updating sdg column to numeric
Browse files- utils/keyword_extraction.py +4 -0
- utils/sdg_classifier.py +1 -0
utils/keyword_extraction.py
CHANGED
@@ -99,6 +99,7 @@ def textrank(textdata:Text, ratio:float = 0.1, words = 0):
|
|
99 |
"""
|
100 |
wrappper function to perform textrank, uses either ratio or wordcount to
|
101 |
extract top keywords limited by words or ratio.
|
|
|
102 |
|
103 |
Params
|
104 |
--------
|
@@ -109,6 +110,9 @@ def textrank(textdata:Text, ratio:float = 0.1, words = 0):
|
|
109 |
Non zero. Howevr incase the pagerank returns lesser keywords than \
|
110 |
compared to fix value then ratio is used.
|
111 |
|
|
|
|
|
|
|
112 |
"""
|
113 |
if words == 0:
|
114 |
try:
|
|
|
99 |
"""
|
100 |
wrappper function to perform textrank, uses either ratio or wordcount to
|
101 |
extract top keywords limited by words or ratio.
|
102 |
+
1. https://github.com/summanlp/textrank/blob/master/summa/keywords.py
|
103 |
|
104 |
Params
|
105 |
--------
|
|
|
110 |
Non zero. Howevr incase the pagerank returns lesser keywords than \
|
111 |
compared to fix value then ratio is used.
|
112 |
|
113 |
+
Return
|
114 |
+
--------
|
115 |
+
results: extracted keywords
|
116 |
"""
|
117 |
if words == 0:
|
118 |
try:
|
utils/sdg_classifier.py
CHANGED
@@ -99,6 +99,7 @@ def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
|
|
99 |
x['SDG_name'] = x['SDG'].apply(lambda x: _lab_dict[x])
|
100 |
x['SDG'] = x['SDG'].apply(lambda x: "SDG "+str(x))
|
101 |
df= df.drop(['Relevancy'], axis = 1)
|
|
|
102 |
|
103 |
|
104 |
return df, x
|
|
|
99 |
x['SDG_name'] = x['SDG'].apply(lambda x: _lab_dict[x])
|
100 |
x['SDG'] = x['SDG'].apply(lambda x: "SDG "+str(x))
|
101 |
df= df.drop(['Relevancy'], axis = 1)
|
102 |
+
df['SDG'] = pd.to_numeric(df['SDG'])
|
103 |
|
104 |
|
105 |
return df, x
|