{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "'''\n", "requirements.txt file contents:\n", " \n", "langchain==0.0.154\n", "PyPDF2==3.0.1\n", "python-dotenv==1.0.0\n", "streamlit==1.18.1\n", "faiss-cpu==1.7.4\n", "streamlit-extras\n", "'''\n", " \n", " \n", "import streamlit as st\n", "from dotenv import load_dotenv\n", "import pickle\n", "from PyPDF2 import PdfReader\n", "from streamlit_extras.add_vertical_space import add_vertical_space\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import FAISS\n", "from langchain.llms import OpenAI\n", "from langchain.chains.question_answering import load_qa_chain\n", "from langchain.callbacks import get_openai_callback\n", "import os\n", " \n", "# Sidebar contents\n", "with st.sidebar:\n", " st.title('🤗💬 LLM Chat App')\n", " st.markdown('''\n", " ## About\n", " This app is an LLM-powered chatbot built using:\n", " - [Streamlit](https://streamlit.io/)\n", " - [LangChain](https://python.langchain.com/)\n", " - [OpenAI](https://platform.openai.com/docs/models) LLM model\n", " \n", " ''')\n", " add_vertical_space(5)\n", " st.write('Made with ❤️ by [Prompt Engineer](https://youtube.com/@engineerprompt)')\n", " \n", "load_dotenv()\n", " \n", "def main():\n", " st.header(\"Chat with PDF 💬\")\n", " \n", " \n", " # upload a PDF file\n", " pdf = st.file_uploader(\"Upload your PDF\", type='pdf')\n", " \n", " # st.write(pdf)\n", " if pdf is not None:\n", " pdf_reader = PdfReader(pdf)\n", " \n", " text = \"\"\n", " for page in pdf_reader.pages:\n", " text += page.extract_text()\n", " \n", " text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=1000,\n", " chunk_overlap=200,\n", " length_function=len\n", " )\n", " chunks = text_splitter.split_text(text=text)\n", " \n", " # # embeddings\n", " store_name = pdf.name[:-4]\n", " st.write(f'{store_name}')\n", " # st.write(chunks)\n", " \n", " if os.path.exists(f\"{store_name}.pkl\"):\n", " with open(f\"{store_name}.pkl\", \"rb\") as f:\n", " VectorStore = pickle.load(f)\n", " # st.write('Embeddings Loaded from the Disk')s\n", " else:\n", " embeddings = OpenAIEmbeddings()\n", " VectorStore = FAISS.from_texts(chunks, embedding=embeddings)\n", " with open(f\"{store_name}.pkl\", \"wb\") as f:\n", " pickle.dump(VectorStore, f)\n", " \n", " # embeddings = OpenAIEmbeddings()\n", " # VectorStore = FAISS.from_texts(chunks, embedding=embeddings)\n", " \n", " # Accept user questions/query\n", " query = st.text_input(\"Ask questions about your PDF file:\")\n", " # st.write(query)\n", " \n", " if query:\n", " docs = VectorStore.similarity_search(query=query, k=3)\n", " \n", " llm = OpenAI()\n", " chain = load_qa_chain(llm=llm, chain_type=\"stuff\")\n", " with get_openai_callback() as cb:\n", " response = chain.run(input_documents=docs, question=query)\n", " print(cb)\n", " st.write(response)\n", " \n", "if __name__ == '__main__':\n", " main()" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }