import streamlit as st import tempfile import os from langchain.document_loaders import UnstructuredFileLoader def main(): st.title("PDF Text Extractor") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: st.subheader("PDF Content : ") # st.text("Extracted using langchain:") temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}") with open(temp_file_path, "wb") as temp_file: temp_file.write(uploaded_file.read()) loader = UnstructuredFileLoader(temp_file_path) data = loader.load() txt = '' for item in data: txt += item.page_content text_content = txt # if st.button("Copy to Clipboard"): # copy(text_content) # st.success("Text copied to clipboard!") st.text_area("Extracted Text:", value=text_content, height=300) # st.text_input("Copy this text:", value=text_content, key="copy_text") if __name__ == "__main__": main()