import streamlit as st import os import shutil from helper import process_docs st.title("Benchmark Parser Performance") st.markdown("### Upload Document:") uploaded_file = st.file_uploader("Choose a file", type='.pdf') if uploaded_file: if not os.path.exists("./Tested_Docs"): os.makedirs("./Tested_Docs") doc_path = f"./Tested_Docs/{uploaded_file.name}" with open(doc_path, "wb") as f: f.write(uploaded_file.read()) try: with st.spinner("Processing document..."): docsllama, docspaddle, docsdocling = process_docs(doc_path) if os.path.exists("./Tested_Docs"): shutil.rmtree('./Tested_Docs') except Exception as e: st.warning(e) st.markdown("### Extracted Text by Llama-Parser :") for page_number, txt in enumerate(docsllama): st.markdown(f"#### Page {page_number+1}") st.text(txt) st.markdown("### Extracted Text by Docling-OCR :") for page_number, txt in docsdocling.items(): st.markdown(f"#### Page {page_number}") st.text(txt) st.markdown("### Extracted Text by Paddle-OCR :") for page_number, txt in enumerate(docspaddle): st.markdown(f"#### Page {page_number+1}") st.text(txt)