import streamlit as st import pdfplumber import base64 from langchain.llms import OpenAI from langchain.vectorstores.cassandra import Cassandra from langchain.indexes.vectorstore import VectorStoreIndexWrapper from langchain.embeddings import OpenAIEmbeddings from typing_extensions import Concatenate from datasets import load_dataset from langchain.memory import ConversationBufferWindowMemory import cassio from PyPDF2 import PdfReader def main(): st.title("INTERACTION WITH PDF USING LLM") pdf_file = st.file_uploader("Upload PDF file", type=["pdf"]) if pdf_file is not None: ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10" ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a" OPENAI_API_KEY="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" pdfreader=PdfReader(pdf_file) raw_text='' for i ,page in enumerate(pdfreader.pages): content=page.extract_text() if content: raw_text += content cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID) llm=OpenAI(openai_api_key=OPENAI_API_KEY) embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) astra_vector_store=Cassandra(embedding=embedding, table_name='qa_mini_demo', session=None, keyspace=None, ) astra_vector_store.delete_collection() from langchain.text_splitter import CharacterTextSplitter text_splitter=CharacterTextSplitter( separator='\n', chunk_size=800, chunk_overlap=200, length_function=len ) texts=text_splitter.split_text(raw_text) astra_vector_store.add_texts(texts) astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store) query_text = st.text_input("Enter your Question:").strip() submit=st.button('Generate') if submit: answer = astra_vector_index.query(query_text, llm=llm).strip() st.write("\nANSWER :\"%s\"" % answer) if __name__ == "__main__": main()