File size: 1,347 Bytes
fa21aad
8c553de
fa21aad
 
 
 
 
 
563bf73
fa21aad
 
563bf73
 
fa21aad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c553de
fa21aad
8c553de
 
fa21aad
 
8c553de
fa21aad
8c553de
fa21aad
 
 
 
c61a55b
fa21aad
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
import PyPDF2
import pandas as pd
from fpdf import FPDF
import os

# Function to process the PDF and categorize text
def process_pdf(file):
    reader = PyPDF2.PdfReader(file)
    categorized_text = []

    for page_num in range(len(reader.pages)):
        page = reader.pages[page_num]
        text = page.extract_text()

        if "Introduction" in text:
            category = "Introduction"
        elif "Methods" in text:
            category = "Methods"
        elif "Results" in text:
            category = "Results"
        elif "Discussion" in text:
            category = "Discussion"
        elif "Conclusion" in text:
            category = "Conclusion"
        else:
            category = "Other"

        categorized_text.append((category, text))

    df = pd.DataFrame(categorized_text, columns=["Category", "Text"])
    return df

# Streamlit app UI
st.title("PDF Extraction and Text Categorization")

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    df = process_pdf(uploaded_file)
    st.write("Categorized Text:")
    st.dataframe(df)

    csv = df.to_csv(index=False).encode('utf-8')
    st.download_button(
        label="Download Categorized Text as CSV",
        data=csv,
        file_name='categorized_text.csv',
        mime='text/csv',
    )