{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os \n", "from dotenv import load_dotenv\n", "\n", "from langchain_community.document_loaders.csv_loader import CSVLoader\n", "\n", "from langchain_community.vectorstores import Vectara\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "loader = CSVLoader(file_path='/home/salgadev/code/DocVerifyRAG/name-description-discipline-data.csv')\n", "data = loader.load()\n", "\n", "vectara_customer_id = os.environ['VECTARA_CUSTOMER_ID']\n", "vectara_corpus_id = os.environ['VECTARA_CORPUS_ID']\n", "vectara_api_key = os.environ['VECTARA_API_KEY']\n", "#hf_token = os.environ['HF_API_TOKEN']\n", "\n", "vectorstore = Vectara(vectara_customer_id=vectara_customer_id,\n", " vectara_corpus_id=vectara_corpus_id,\n", " vectara_api_key=vectara_api_key)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from langchain_community.embeddings import HuggingFaceEmbeddings\n", "embeddings = HuggingFaceEmbeddings(model_name=\"intfloat/multilingual-e5-large\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "vectara = Vectara.from_documents(data, embedding=embeddings)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from langchain.chains.qa_with_sources import load_qa_with_sources_chain\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "summary_config = {\"is_enabled\": True, \"max_results\": 5, \"response_lang\": \"eng\"}\n", "retriever = vectara.as_retriever(\n", " search_kwargs={\"k\": 3, \"summary_config\": summary_config}\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def get_sources(documents):\n", " return documents[:-1]\n", "\n", "\n", "def get_summary(documents):\n", " return documents[-1].page_content" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The documents related to the electrical discipline include items like ISB-020-U3-W-E-01-B07005-002-020, which pertains to U3 740KV 2 USV, and ISB-020-U3-W-E-01-B07005-002-040 for U3 780KV 4 equipment. These documents are part of the E - Elektroanlagen discipline, focusing on electrical systems and installations [7][11]. Additionally, there are documents specifying different aspects such as AS 1_G010, AS 2_G011, and AS 1_G009, highlighting specific details within the electrical discipline documentation [7][11]. These documents are crucial for ensuring proper electrical planning, design, and implementation within various systems and structures.'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query_str = \"Describe document related to the electrical discipline\"\n", "\n", "(retriever | get_summary).invoke(query_str)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(page_content=': 12\\nName: ISB-020-U3-W-E-01-B07005-002-020\\nBeschreibung: E_020 U3 740_KV 2_USV\\nDisziplin: E - Elektroanlagen : 13\\nName: ISB-020-U3-W-E-01-B07005-002-040\\nBeschreibung: E_020 U3 780_KV 4\\nDisziplin: E - Elektroanlagen : 14\\nName: ISB-020-U3-W-E-01-B07005-003-010\\nBeschreibung: G_020 U3 711_AS 2_G011\\nDisziplin: E - Elektroanlagen : 15\\nName: ISB-020-U3-W-E-01-B15100-035-000\\nBeschreibung: Luftmengen Protokoll\\nDisziplin: L - Lueftung : 16\\nName: ISB-020-U3-W-E-01-B15100-036-000\\nBeschreibung: Luftmengen Protokoll\\nDisziplin: L - Lueftung', metadata={'source': 'langchain', 'row': '14', 'lang': 'deu', 'offset': '0', 'len': '110'}),\n", " Document(page_content=': 7\\nName: ISB-020-U3-W-E-01-B07005-001-010\\nBeschreibung: E_020 U3 780_KV 4_E031 E_Ladestationen\\nDisziplin: E - Elektroanlagen : 8\\nName: ISB-020-U3-W-E-01-B07005-001-020\\nBeschreibung: E_020 U3 740_KV 2\\nDisziplin: E - Elektroanlagen : 9\\nName: ISB-020-U3-W-E-01-B07005-001-040\\nBeschreibung: G_020 U3 779_AS 1_G009\\nDisziplin: E - Elektroanlagen : 10\\nName: ISB-020-U3-W-E-01-B07005-001-999\\nBeschreibung: 772 UV 1 G022 / WW 218057\\nDisziplin: E - Elektroanlagen : 11\\nName: ISB-020-U3-W-E-01-B07005-002-010\\nBeschreibung: G_020 U3 711_AS 1_G010\\nDisziplin: E - Elektroanlagen', metadata={'source': 'langchain', 'row': '9', 'lang': 'deu', 'offset': '0', 'len': '109'}),\n", " Document(page_content=': 11\\nName: ISB-020-U3-W-E-01-B07005-002-010\\nBeschreibung: G_020 U3 711_AS 1_G010\\nDisziplin: E - Elektroanlagen : 12\\nName: ISB-020-U3-W-E-01-B07005-002-020\\nBeschreibung: E_020 U3 740_KV 2_USV\\nDisziplin: E - Elektroanlagen : 13\\nName: ISB-020-U3-W-E-01-B07005-002-040\\nBeschreibung: E_020 U3 780_KV 4\\nDisziplin: E - Elektroanlagen : 14\\nName: ISB-020-U3-W-E-01-B07005-003-010\\nBeschreibung: G_020 U3 711_AS 2_G011\\nDisziplin: E - Elektroanlagen : 15\\nName: ISB-020-U3-W-E-01-B15100-035-000\\nBeschreibung: Luftmengen Protokoll\\nDisziplin: L - Lueftung', metadata={'source': 'langchain', 'row': '13', 'lang': 'deu', 'offset': '0', 'len': '105'})]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(retriever | get_sources).invoke(query_str)\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "madeup_metadata = {'filename': 'school_plumbing.txt', 'description': 'This document describes the plumbing system for a typical school building, including potable water supply, fixtures and appliances, drainage waste and vent (DWV) systems, and stormwater management.', 'discipline': 'plumbing'}" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "prompt_template = \"\"\"Compare the following metadata and return a confidence interval measuring how much the metadata is similar to your available information \n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The returned results did not contain sufficient information to be summarized into a useful answer for your query. Please try a different search or restate your query differently.'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query_str = f'{prompt_template}\\nmetadata:{madeup_metadata}'\n", "(retriever | get_summary).invoke(query_str)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "query_str = 'What discipline does this description belong to? Description: This document provides instructions for handling, assembly, maintenance, and troubleshooting of Hawle Flanschen-Schieber, primarily used in water supply systems with a maximum operating pressure of 25 bar and temperature of 40°C.'\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'The description provided pertains to the discipline of Sanitaer (Sanitary), as indicated by search results [159] and [160]. These instructions are related to handling, assembly, maintenance, and troubleshooting of Hawle Flanschen-Schieber, commonly utilized in water supply systems with a maximum operating pressure of 25 bar and temperature of 40°C. The document likely focuses on the proper procedures for managing and servicing these components within sanitary systems.'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(retriever | get_summary).invoke(query_str)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 2 }