Spaces:

Md919191
/

pdf_to_excel_app

Running

Md919191 commited on Jan 26

Commit

a0dcf2b

verified ·

1 Parent(s): 2332d53

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,40 @@
 def main():
     st.title("PDF to Excel Converter")
     # File uploader
     uploaded_pdf = st.file_uploader("Upload a PDF file", type="pdf")
     if uploaded_pdf:
-        # Extract text
         text = extract_text_from_pdf(uploaded_pdf)
         st.success("Text extracted from PDF!")
-        # Show extracted text
         st.text_area("Extracted Text", text, height=300)
-        # Download Excel file
         if st.button("Convert to Excel"):
             output_file = "converted_file.xlsx"
             convert_text_to_excel(text, output_file)
@@ -24,6 +46,7 @@ def main():
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                 )
             os.remove(output_file)
-            if __name__ == "__main__":
-    main()

+import streamlit as st
+import pandas as pd
+import pdfplumber
+import os
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_file):
+    with pdfplumber.open(pdf_file) as pdf:
+        text = ""
+        for page in pdf.pages:
+            text += page.extract_text()
+        return text
+# Function to convert extracted text to Excel
+def convert_text_to_excel(text, output_file):
+    rows = text.split("\n")
+    data = [row.split() for row in rows]
+    df = pd.DataFrame(data)
+    df.to_excel(output_file, index=False)
+# Main function to build the Streamlit app
 def main():
     st.title("PDF to Excel Converter")
     # File uploader
     uploaded_pdf = st.file_uploader("Upload a PDF file", type="pdf")
+    # Check if a file has been uploaded
     if uploaded_pdf:
+        # Extract text from the PDF
         text = extract_text_from_pdf(uploaded_pdf)
         st.success("Text extracted from PDF!")
+        # Display the extracted text
         st.text_area("Extracted Text", text, height=300)
+        # Button to convert and download Excel
         if st.button("Convert to Excel"):
             output_file = "converted_file.xlsx"
             convert_text_to_excel(text, output_file)
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                 )
             os.remove(output_file)
+# Entry point of the script
+if __name__ == "__main__":
+    main()