PDF-READER-LLM / app.py
ARBAJSSHAIKH's picture
Update app.py
ef419e8 verified
import streamlit as st
import pdfplumber
import base64
from langchain.llms import OpenAI
from langchain.vectorstores.cassandra import Cassandra
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.embeddings import OpenAIEmbeddings
from typing_extensions import Concatenate
from datasets import load_dataset
from langchain.memory import ConversationBufferWindowMemory
import cassio
from PyPDF2 import PdfReader
def main():
st.title("INTERACTION WITH PDF USING LLM")
pdf_file = st.file_uploader("Upload PDF file", type=["pdf"])
if pdf_file is not None:
ASTRA_DB_APPLICATION_TOKEN="AstraCS:KRrILGTZHQMczBfoJhucdxkN:a6aaf66c8f7e318f1048bb13ec9132510c3fefc85501a5268cd873edd418ad10"
ASTRA_DB_ID="800e9596-9d6a-487d-a87c-b95436d8026a"
OPENAI_API_KEY="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"
pdfreader=PdfReader(pdf_file)
raw_text=''
for i ,page in enumerate(pdfreader.pages):
content=page.extract_text()
if content:
raw_text += content
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN,database_id=ASTRA_DB_ID)
llm=OpenAI(openai_api_key=OPENAI_API_KEY)
embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
astra_vector_store=Cassandra(embedding=embedding,
table_name='qa_mini_demo',
session=None,
keyspace=None,
)
astra_vector_store.delete_collection()
from langchain.text_splitter import CharacterTextSplitter
text_splitter=CharacterTextSplitter(
separator='\n',
chunk_size=800,
chunk_overlap=200,
length_function=len
)
texts=text_splitter.split_text(raw_text)
astra_vector_store.add_texts(texts)
astra_vector_index=VectorStoreIndexWrapper(vectorstore=astra_vector_store)
query_text = st.text_input("Enter your Question:").strip()
submit=st.button('Generate')
if submit:
answer = astra_vector_index.query(query_text, llm=llm).strip()
st.write("\nANSWER :\"%s\"" % answer)
if __name__ == "__main__":
main()