{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain import OpenAI\n", "from langchain.chains import RetrievalQA\n", "from langchain.document_loaders import DirectoryLoader\n", "import magic\n", "import os\n", "import nltk\n", "\n", "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", "data_location= os.getenv(\"VECTOR_DATA_DIR\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Chroma" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from modules.vector_stores.vector_stores.chroma_manager import get_default_chroma_mgr\n", "\n", "chroma_mgr = get_default_chroma_mgr(persisted=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chroma_mgr.persist()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from modules.vector_stores.retrieval.basic_qa import get_default_qa\n", "\n", "qa = get_default_qa(chroma_mgr.db)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Cite sources\n", "def process_llm_response(llm_response):\n", " print(llm_response['result'])\n", " print('\\n\\nSources:')\n", " for source in llm_response[\"source_documents\"]:\n", " print(source.metadata['source'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# full example\n", "query = \"What is a date table?\"\n", "resp = qa.ask(query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## FAISS" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from modules.vector_stores.loaders.pypdf_load_strategy import PyPDFLoadStrategy, PyPDFConfig, get_default_pypdf_loader\n", "from modules.vector_stores.embedding.openai import OpenAIEmbeddings, OpenAIEmbedConfig, get_default_openai_embeddings\n", "def get_example_pdf_embedding():\n", " dir_location = \"../data\"\n", " loader = get_default_pypdf_loader(dir_location)\n", " documents = loader.load()\n", " embeddings = get_default_openai_embeddings()\n", " index = FAISS.from_documents(documents, embeddings)\n", " return index\n", "index = get_example_pdf_embedding()\n", "llm = OpenAI(openai_api_key=openai_api_key)\n", "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=index.as_retriever())\n", "qa = RetrievalQA.from_chain_type(llm=llm,\n", " chain_type=\"stuff\",\n", " retriever=index.as_retriever(),\n", " return_source_documents=True)\n", "query = \"What is a date table?\"\n", "result = qa({\"query\": query})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "docsearch = FAISS.from_documents(documents, embeddings)\n", "llm = OpenAI(openai_api_key=openai_api_key)\n", "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=docsearch.as_retriever())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa = RetrievalQA.from_chain_type(llm=llm,\n", " chain_type=\"stuff\",\n", " retriever=docsearch.as_retriever(),\n", " return_source_documents=True)\n", "query = \"What is a date table?\"\n", "result = qa({\"query\": query})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }