File size: 1,295 Bytes
f51b958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st

import glob, os, sys; sys.path.append('/src')
#import helper
import preprocessing as pre
import cleaning as clean

def app():
    # Sidebar
    st.sidebar.title('Analyse Policy Document')

    # Container
    with st.container():
        st.markdown("<h1 style='text-align: center; color: black;'>SDSN X GIZ Policy Tracing</h1>",
                    unsafe_allow_html=True)

        file = st.file_uploader('Upload PDF File', type=['pdf', 'docx', 'txt'])

        if file is not None:
            st.write("Filename: ", file.name)
            # text = []
            # with pdfplumber.open(file) as pdf:
            #     for page in pdf.pages:
            #         text.append(page.extract_text())
            #     text_str = ' '.join([page for page in text])

            #     st.write('Number of pages:',len(pdf.pages))

            # load document
            docs = pre.load_document(file)

            # preprocess document
            docs_processed, df, all_text = clean.preprocessing(docs)
                

        
            st.write('... ')

        else:
            st.write(' ')
            st.write(' ')
            st.markdown("<h3 style='text-align: center; color: black;'>no PDF uploaded ...</h3>",
                        unsafe_allow_html=True)