import os import glob import textwrap import time import langchain from langchain.document_loaders import PyPDFLoader, DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain import PromptTemplate, LLMChain from langchain.vectorstores import FAISS from langchain.llms import HuggingFacePipeline from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.chains import RetrievalQA import torch import transformers from model import qa_chain def wrap_text_preserve_newlines(text, width=700): # Split the input text into lines based on newline characters lines = text.split('\n') # Wrap each line individually wrapped_lines = [textwrap.fill(line, width=width) for line in lines] # Join the wrapped lines back together using newline characters wrapped_text = '\n'.join(wrapped_lines) return wrapped_text def process_llm_response(llm_response): ans = wrap_text_preserve_newlines(llm_response['result']) sources_used = ' \n'.join( [ source.metadata['source'].split('/')[-1][:-4] + ' - page: ' + str(source.metadata['page']) for source in llm_response['source_documents'] ] ) ans = ans + '\n\nSources: \n' + sources_used return ans def llm_ans(query): start = time.time() llm_response = qa_chain.invoke(query) ans = process_llm_response(llm_response) end = time.time() time_elapsed = int(round(end - start, 0)) time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s' ans_loc=ans.find("Answer:") ans_loc+=len("Answer: ") return ans[ans_loc:] # query = "what are computer networks?" # result=llm_ans(query) # print(result) # print(type(result))