App_7 / app.py
rushi-k's picture
Update app.py
563bf73 verified
raw
history blame contribute delete
No virus
1.35 kB
import streamlit as st
import PyPDF2
import pandas as pd
from fpdf import FPDF
import os
# Function to process the PDF and categorize text
def process_pdf(file):
reader = PyPDF2.PdfReader(file)
categorized_text = []
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text = page.extract_text()
if "Introduction" in text:
category = "Introduction"
elif "Methods" in text:
category = "Methods"
elif "Results" in text:
category = "Results"
elif "Discussion" in text:
category = "Discussion"
elif "Conclusion" in text:
category = "Conclusion"
else:
category = "Other"
categorized_text.append((category, text))
df = pd.DataFrame(categorized_text, columns=["Category", "Text"])
return df
# Streamlit app UI
st.title("PDF Extraction and Text Categorization")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
df = process_pdf(uploaded_file)
st.write("Categorized Text:")
st.dataframe(df)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button(
label="Download Categorized Text as CSV",
data=csv,
file_name='categorized_text.csv',
mime='text/csv',
)