Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from langchain.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.retrievers import SVMRetriever | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| def load_data(): | |
| # load the documents | |
| loader = DirectoryLoader('./data', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader) | |
| docs = loader.load() | |
| # replace all new lines with spaces | |
| [setattr(doc, "page_content", doc.page_content.replace("\n", " ")) for doc in docs] | |
| print(docs) | |
| # split the documents into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50) | |
| all_splits = text_splitter.split_documents(docs) | |
| # construct vector store | |
| vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) | |
| # https://python.langchain.com/docs/use_cases/question_answering.html#go-deeper-3 | |
| svm_retriever = SVMRetriever.from_documents(all_splits, OpenAIEmbeddings()) | |
| return svm_retriever, vectorstore | |
| svm_retriever, vectorstore = load_data() | |
| def process_question(question, svm_retriever=svm_retriever, vectorstore=vectorstore): | |
| docs_svm=svm_retriever.get_relevant_documents(question) | |
| print(len(docs_svm)) | |
| llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) | |
| qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever(), return_source_documents=True) | |
| result = qa_chain({"query": question}) | |
| output = f""" | |
| ============RESULT============== | |
| \n | |
| {result["result"]} | |
| \n | |
| ============SOURCES============= | |
| """ | |
| # Initialize an empty list to hold the lines | |
| lines = [] | |
| source_docs = [(x.metadata["source"], x.page_content) for x in result["source_documents"]] | |
| for i, doc in enumerate(source_docs): | |
| lines.append(f"* CHUNK: {i} *") | |
| lines.append(f"original doc: {doc[0]}") | |
| lines.append(f"{doc[1]}") | |
| lines.append('') # for a newline between chunks | |
| # Join the lines with a newline character to get the multi-line string | |
| output += '\n'.join(lines) | |
| return output | |
| iface = gr.Interface( | |
| fn=process_question, # the function to wrap | |
| inputs="text", # the input type | |
| outputs="text", # the output type | |
| examples=[ | |
| [f"what is the process of raising an incident?"], | |
| [f"What is Cx0 program management?"], | |
| [ | |
| f"What is process for identifying risksthat can impact the desired outcomes of a project?" | |
| ], | |
| [f"What is the release management process?"], | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |