In [1]:
import os 
from dotenv import load_dotenv

from langchain_community.document_loaders.csv_loader import CSVLoader

from langchain_community.vectorstores import Vectara
load_dotenv()

True

In [2]:
loader = CSVLoader(file_path='/home/salgadev/code/DocVerifyRAG/name-description-discipline-data.csv')
data = loader.load()

vectara_customer_id = os.environ['VECTARA_CUSTOMER_ID']
vectara_corpus_id = os.environ['VECTARA_CORPUS_ID']
vectara_api_key = os.environ['VECTARA_API_KEY']
#hf_token = os.environ['HF_API_TOKEN']

vectorstore = Vectara(vectara_customer_id=vectara_customer_id,
 vectara_corpus_id=vectara_corpus_id,
 vectara_api_key=vectara_api_key)

In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")

In [4]:
vectara = Vectara.from_documents(data, embedding=embeddings)

In [5]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain



In [7]:
summary_config = {"is_enabled": True, "max_results": 5, "response_lang": "eng"}
retriever = vectara.as_retriever(
 search_kwargs={"k": 3, "summary_config": summary_config}
)

In [8]:
def get_sources(documents):
 return documents[:-1]


def get_summary(documents):
 return documents[-1].page_content

In [9]:
query_str = "Describe document related to the electrical discipline"

(retriever | get_summary).invoke(query_str)

'The documents related to the electrical discipline include items like ISB-020-U3-W-E-01-B07005-002-020, which pertains to U3 740KV 2 USV, and ISB-020-U3-W-E-01-B07005-002-040 for U3 780KV 4 equipment. These documents are part of the E - Elektroanlagen discipline, focusing on electrical systems and installations [7][11]. Additionally, there are documents specifying different aspects such as AS 1_G010, AS 2_G011, and AS 1_G009, highlighting specific details within the electrical discipline documentation [7][11]. These documents are crucial for ensuring proper electrical planning, design, and implementation within various systems and structures.'

In [10]:
(retriever | get_sources).invoke(query_str)



[Document(page_content=': 12\nName: ISB-020-U3-W-E-01-B07005-002-020\nBeschreibung: E_020 U3 740_KV 2_USV\nDisziplin: E - Elektroanlagen : 13\nName: ISB-020-U3-W-E-01-B07005-002-040\nBeschreibung: E_020 U3 780_KV 4\nDisziplin: E - Elektroanlagen : 14\nName: ISB-020-U3-W-E-01-B07005-003-010\nBeschreibung: G_020 U3 711_AS 2_G011\nDisziplin: E - Elektroanlagen : 15\nName: ISB-020-U3-W-E-01-B15100-035-000\nBeschreibung: Luftmengen Protokoll\nDisziplin: L - Lueftung : 16\nName: ISB-020-U3-W-E-01-B15100-036-000\nBeschreibung: Luftmengen Protokoll\nDisziplin: L - Lueftung', metadata={'source': 'langchain', 'row': '14', 'lang': 'deu', 'offset': '0', 'len': '110'}),
 Document(page_content=': 7\nName: ISB-020-U3-W-E-01-B07005-001-010\nBeschreibung: E_020 U3 780_KV 4_E031 E_Ladestationen\nDisziplin: E - Elektroanlagen : 8\nName: ISB-020-U3-W-E-01-B07005-001-020\nBeschreibung: E_020 U3 740_KV 2\nDisziplin: E - Elektroanlagen : 9\nName: ISB-020-U3-W-E-01-B07005-001-040\nBeschreibung: G_020 U3 779_A

In [11]:
madeup_metadata = {'filename': 'school_plumbing.txt', 'description': 'This document describes the plumbing system for a typical school building, including potable water supply, fixtures and appliances, drainage waste and vent (DWV) systems, and stormwater management.', 'discipline': 'plumbing'}

In [12]:
prompt_template = """Compare the following metadata and return a confidence interval measuring how much the metadata is similar to your available information 
"""

In [13]:
query_str = f'{prompt_template}\nmetadata:{madeup_metadata}'
(retriever | get_summary).invoke(query_str)

'The returned results did not contain sufficient information to be summarized into a useful answer for your query. Please try a different search or restate your query differently.'

In [15]:
query_str = 'What discipline does this description belong to? Description: This document provides instructions for handling, assembly, maintenance, and troubleshooting of Hawle Flanschen-Schieber, primarily used in water supply systems with a maximum operating pressure of 25 bar and temperature of 40°C.'


In [16]:
(retriever | get_summary).invoke(query_str)

'The description provided pertains to the discipline of Sanitaer (Sanitary), as indicated by search results [159] and [160]. These instructions are related to handling, assembly, maintenance, and troubleshooting of Hawle Flanschen-Schieber, commonly utilized in water supply systems with a maximum operating pressure of 25 bar and temperature of 40°C. The document likely focuses on the proper procedures for managing and servicing these components within sanitary systems.'