# -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com import shutil import tempfile import zipfile from pathlib import Path import streamlit as st import streamlit_scrollable_textbox as stx from extract_office_content import ExtractExcel, ExtractPPT, ExtractWord word_extract = ExtractWord() ppt_extracter = ExtractPPT() excel_extract = ExtractExcel() def mkdir(dir_path): Path(dir_path).mkdir(parents=True, exist_ok=True) if __name__ == '__main__': st.markdown("

ExtractOfficeContent

", unsafe_allow_html=True) st.markdown("""

PyPI

""", unsafe_allow_html=True) st.markdown('##### Extract PPT Content') uploaded_file = st.file_uploader("Choose a file", type=['ppt', 'pptx'], key='ppt', label_visibility='collapsed') if uploaded_file is not None: bytes_data = uploaded_file.getvalue() file_name = Path(uploaded_file.name).stem with tempfile.TemporaryDirectory() as tmp_dir: with st.spinner('正在提取中....'): ppt_res = ppt_extracter(bytes_data, save_img_dir=tmp_dir) st.markdown('提取结果:') stx.scrollableTextbox(ppt_res, height=300) zip_dir = Path('ppt_zip_dir') mkdir(zip_dir) zip_path = zip_dir / f'{file_name}.zip' with zipfile.ZipFile(zip_path, mode='w') as zf: for file_path in Path(tmp_dir).rglob('*.*'): zf.write(file_path) with open(zip_path, 'rb') as file: is_download = st.download_button('Download images', data=file, file_name=f'{file_name}.zip') if is_download: shutil.rmtree(zip_dir) st.markdown('##### Extract Word Content') uploaded_file = st.file_uploader("Choose a file", type=['doc', 'docx'], key='word', label_visibility='collapsed') if uploaded_file is not None: bytes_data = uploaded_file.getvalue() file_name = Path(uploaded_file.name).stem with tempfile.TemporaryDirectory() as word_tmp_dir: with st.spinner('正在提取中....'): word_res = word_extract(bytes_data, save_img_dir=word_tmp_dir) st.markdown('提取结果:') stx.scrollableTextbox(word_res, height=300) zip_dir = Path('word_zip_dir') mkdir(zip_dir) zip_path = zip_dir / f'{file_name}.zip' with zipfile.ZipFile(zip_path, mode='w') as zf: for file_path in Path(word_tmp_dir).rglob('*.*'): zf.write(file_path) with open(zip_path, 'rb') as file: is_download = st.download_button('Download images', data=file, file_name=f'{file_name}.zip') if is_download: shutil.rmtree(zip_dir) st.markdown('##### Extract Excel Content') uploaded_file = st.file_uploader("Choose a file", type=['xls', 'xlsx'], key='excel', label_visibility='collapsed') if uploaded_file is not None: bytes_data = uploaded_file.getvalue() file_name = Path(uploaded_file.name).stem with tempfile.TemporaryDirectory() as word_tmp_dir: with st.spinner('正在提取中....'): word_res = excel_extract(bytes_data, save_img_dir=word_tmp_dir) st.markdown('提取结果:') stx.scrollableTextbox(word_res, height=300) zip_dir = Path('excel_zip_dir') mkdir(zip_dir) zip_path = zip_dir / f'{file_name}.zip' with zipfile.ZipFile(zip_path, mode='w') as zf: for file_path in Path(word_tmp_dir).rglob('*.*'): zf.write(file_path) with open(zip_path, 'rb') as file: is_download = st.download_button('Download images', data=file, file_name=f'{file_name}.zip') if is_download: shutil.rmtree(zip_dir)