File size: 2,291 Bytes
d4c5df5
 
 
 
 
 
 
 
5c88547
 
 
 
 
 
d4c5df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import os
import tempfile
import PyPDF2

def main():
    st.title("PDF Metadata Extractor")

    with st.expander("ℹ️ About"):
        st.markdown("""This app extracts metadata from PDF files. 
        You can upload one or more PDF files and view their metadata.""")
        st.markdown("""(c) 2024 [Louie F. Cervantes, M.Eng. (Informaation Enfineering)]
        (https://huggingface.co/spaces/louiecerv)""")

    # Folder uploader (using the st.file_uploader with multiple file selection)
    uploaded_files = st.file_uploader("Upload PDF files:", type="pdf", accept_multiple_files=True)
    
    if not uploaded_files:
        st.warning("Please upload PDF files.")
        return

    try:
        # Create a temporary directory
        with tempfile.TemporaryDirectory() as temp_dir:
            pdf_files = []

            # Save uploaded files to the temporary directory and collect their names
            for uploaded_file in uploaded_files:
                with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f:
                    f.write(uploaded_file.read())
                pdf_files.append(uploaded_file.name)

            if not pdf_files:
                st.warning("No PDF files found in the uploaded selection.")
                return

            st.subheader("Detected PDF Files:")
            for pdf_file in pdf_files:
                st.write(pdf_file)

            if st.button("Extract Metadata"):
                st.subheader("PDF Metadata:")
                for pdf_file in pdf_files:
                    try:
                        pdf_path = os.path.join(temp_dir, pdf_file)
                        with open(pdf_path, 'rb') as f:
                            pdf_reader = PyPDF2.PdfReader(f)
                            pdf_info = pdf_reader.metadata

                            st.write(f"**File:** {pdf_file}")
                            for key, value in pdf_info.items():
                                st.write(f"{key}: {value}")
                            st.write("---")

                    except Exception as e:
                        st.error(f"Error processing {pdf_file}: {e}")

    except Exception as e:
        st.error(f"An error occurred: {e}")
        return

if __name__ == "__main__":
    main()