import streamlit as st | |
from PyPDF2 import PdfReader | |
from streamlit_extras.add_vertical_space import add_vertical_space | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import faiss | |
with st.sidebar: | |
st.title("File Research using LLM") | |
st.markdown(''' Upload your file and ask questions and do Research''') | |
add_vertical_space(5) | |
pdf=st.file_uploader('Upload your file (PDF)', type='pdf') | |
if pdf is not None: | |
pdf_reader=PdfReader(pdf) | |
text="" | |
for page in pdf_reader.pages: | |
text+=page.extract_text() | |
text_splitter=RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
chunks=text_splitter.split_text(text) | |
embeddings=OpenAIEmbeddings() | |
vectorstore=faiss.FAISS.from_texts(chunks, embedding=embeddings) | |
st.write(chunks) | |
st.write('Made by ALOK') | |
def main(): | |
st.header('Talk to your file') | |
if __name__=='__main__': | |
main() |