Spaces:
GIZ
/
Running on CPU Upgrade

File size: 1,610 Bytes
49a314a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import glob, os, sys; 
sys.path.append('../udfPreprocess')

#import helper
import udfPreprocess.docPreprocessing as pre
import udfPreprocess.cleaning as clean

#import needed libraries
import seaborn as sns
from pandas import DataFrame
from keybert import KeyBERT
from transformers import pipeline
import matplotlib.pyplot as plt
import numpy as np
import streamlit as st
import pandas as pd
import docx
from docx.shared import Inches
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE 

import tempfile
import sqlite3
import logging
logger = logging.getLogger(__name__)
import configparser

@st.cache(allow_output_mutation=True)
def load_sdgClassifier():
    classifier = pipeline("text-classification", model= "jonas/sdg_classifier_osdg")
    logging.info("Loading classifier")
    return classifier

def sdg_classification(par_list):
    logging.info("running SDG classifiication")
    config = configparser.ConfigParser()
    config.read_file(open('udfPreprocess/paramconfig.cfg'))
    threshold = float(config.get('sdg','THRESHOLD'))


    classifier = load_sdgClassifier()
    labels = classifier(par_list)
    
    labels_= [(l['label'],l['score']) for l in labels]
    # df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
    df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])

    df2['text'] = par_list      
    df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)  
    df2.index += 1
    df2 =df2[df2['Relevancy']>threshold]
    x = df2['SDG'].value_counts()
    df3 = df2.copy()
    df3= df3.drop(['Relevancy'], axis = 1)
    

    return df3, x