Spaces:
Paused
Paused
| import streamlit as st | |
| import fitz # PyMuPDF | |
| from docx import Document | |
| from io import BytesIO | |
| def convert_pdf_to_word(pdf_file): | |
| # Open the PDF file | |
| pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
| doc = Document() | |
| # Extract text from each page | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document.load_page(page_num) | |
| text = page.get_text() | |
| doc.add_paragraph(text) | |
| return doc | |
| def main(): | |
| st.title("PDF to Word Converter") | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| with st.spinner('Converting...'): | |
| word_doc = convert_pdf_to_word(uploaded_file) | |
| buffer = BytesIO() | |
| word_doc.save(buffer) | |
| buffer.seek(0) | |
| st.success('Conversion successful!') | |
| st.download_button( | |
| label="Download Word document", | |
| data=buffer, | |
| file_name="converted_document.docx", | |
| mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ) | |
| if __name__ == "__main__": | |
| main() | |