Spaces:
Running
Running
import streamlit as st | |
import fitz # PyMuPDF | |
from docx import Document | |
from io import BytesIO | |
def convert_pdf_to_word(pdf_file): | |
# Open the PDF file | |
pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
doc = Document() | |
# Extract text from each page | |
for page_num in range(len(pdf_document)): | |
page = pdf_document.load_page(page_num) | |
text = page.get_text() | |
doc.add_paragraph(text) | |
return doc | |
def main(): | |
st.title("PDF to Word Converter") | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
if uploaded_file is not None: | |
with st.spinner('Converting...'): | |
word_doc = convert_pdf_to_word(uploaded_file) | |
buffer = BytesIO() | |
word_doc.save(buffer) | |
buffer.seek(0) | |
st.success('Conversion successful!') | |
st.download_button( | |
label="Download Word document", | |
data=buffer, | |
file_name="converted_document.docx", | |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
) | |
if __name__ == "__main__": | |
main() | |