prashant
commited on
Commit
•
7de7bf4
1
Parent(s):
1a4b146
haystack adaptation
Browse files
udfPreprocess/sdg_classifier.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from tkinter import Text
|
2 |
from haystack.nodes import TransformersDocumentClassifier
|
|
|
3 |
from typing import List, Tuple
|
4 |
import configparser
|
5 |
import streamlit as st
|
@@ -27,7 +28,7 @@ def load_sdgClassifier():
|
|
27 |
return doc_classifier
|
28 |
|
29 |
|
30 |
-
def sdg_classification(
|
31 |
"""
|
32 |
Text-Classification on the list of texts provided. Classifier provides the
|
33 |
most appropriate label for each text. these labels are in terms of if text
|
@@ -35,9 +36,9 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
|
|
35 |
|
36 |
Params
|
37 |
---------
|
38 |
-
|
39 |
-
contains
|
40 |
-
is
|
41 |
|
42 |
Returns
|
43 |
----------
|
@@ -51,7 +52,7 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
|
|
51 |
|
52 |
|
53 |
classifier = load_sdgClassifier()
|
54 |
-
results = classifier.predict(
|
55 |
|
56 |
|
57 |
labels_= [(l.meta['classification']['label'],
|
@@ -89,4 +90,4 @@ def runSDGPreprocessingPipeline()->List[Text]:
|
|
89 |
"split_by": split_by, \
|
90 |
"split_length":split_length}})
|
91 |
|
92 |
-
return output_sdg_pre['
|
|
|
1 |
from tkinter import Text
|
2 |
from haystack.nodes import TransformersDocumentClassifier
|
3 |
+
from haystack.schema import Document
|
4 |
from typing import List, Tuple
|
5 |
import configparser
|
6 |
import streamlit as st
|
|
|
28 |
return doc_classifier
|
29 |
|
30 |
|
31 |
+
def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
|
32 |
"""
|
33 |
Text-Classification on the list of texts provided. Classifier provides the
|
34 |
most appropriate label for each text. these labels are in terms of if text
|
|
|
36 |
|
37 |
Params
|
38 |
---------
|
39 |
+
haystackdoc: List of haystack Documents. The output of Preprocessing Pipeline
|
40 |
+
contains the list of paragraphs in different format,here the list of
|
41 |
+
Haystack Documents is used.
|
42 |
|
43 |
Returns
|
44 |
----------
|
|
|
52 |
|
53 |
|
54 |
classifier = load_sdgClassifier()
|
55 |
+
results = classifier.predict(haystackdoc)
|
56 |
|
57 |
|
58 |
labels_= [(l.meta['classification']['label'],
|
|
|
90 |
"split_by": split_by, \
|
91 |
"split_length":split_length}})
|
92 |
|
93 |
+
return output_sdg_pre['documents']
|