# Reference https://huggingface.co/spaces/johnmuchiri/anspro1/blob/main/app.py # Resource https://python.langchain.com/docs/modules/chains import streamlit as st from langchain_community.document_loaders.pdf import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores.pinecone import Pinecone from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain_core.prompts import ChatPromptTemplate from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain import openai from dotenv import load_dotenv import os import pinecone load_dotenv() # please create a streamlit app on huggingface that uses openai api # and langchain data framework, the user should be able to upload # a document and ask questions about the document, the app should # respond with an answer and also display where the response is # referenced from using some sort of visual annotation on the document # set the path where you want to save the uploaded PDF file SAVE_DIR = "pdf" def generate_response(pages, query_text, k, chain_type): if pages: pinecone.init( api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENV_NAME"), ) vector_db = Pinecone.from_documents( documents=pages, embedding=OpenAIEmbeddings(), index_name="document-chat" ) retriever = vector_db.as_retriever( search_type="similarity", search_kwards={"k": k} ) prompt_template = ChatPromptTemplate.from_messages( [ ( "system", "You are a helpful assistant that can answer questions regarding to a document provided by the user.", ), ("human", "Hello, how are you doing?"), ("ai", "I'm doing well, thanks!"), ("human", "{user_input}"), ] ) llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # create a chain to answer questions qa = RetrievalQAWithSourcesChain.from_chain_type( llm=llm, chain_type=chain_type, retriever=retriever, return_source_documents=True, # prompt_template=prompt_template, ) response = qa({"question": query_text}) return response def visual_annotate(document, answer): # Implement this function according to your specific requirements # Highlight the part of the document where the answer was found start = document.find(answer) annotated_document = ( document[:start] + "**" + document[start : start + len(answer)] + "**" + document[start + len(answer) :] ) return annotated_document st.set_page_config(page_title="🦜🔗 Ask the Doc App") st.title("Document Question Answering App") with st.sidebar.form(key="sidebar-form"): st.header("Configurations") openai_api_key = st.text_input("Enter OpenAI API key here", type="password") os.environ["OPENAI_API_KEY"] = openai_api_key pinecone_api_key = st.text_input( "Enter your Pinecone environment key", type="password" ) os.environ["PINECONE_API_KEY"] = pinecone_api_key pinecone_env_name = st.text_input("Enter your Pinecone environment name") os.environ["PINECONE_ENV_NAME"] = pinecone_env_name submitted = st.form_submit_button( label="Submit", # disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name), ) left_column, right_column = st.columns(2) with left_column: uploaded_file = st.file_uploader("Choose a pdf file", type="pdf") pages = [] if uploaded_file is not None: # save the uploaded file to the specified directory file_path = os.path.join(SAVE_DIR, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) st.success(f"File {uploaded_file.name} is saved at path {file_path}") loader = PyPDFLoader(file_path=file_path) text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) pages = loader.load_and_split(text_splitter=text_splitter) query_text = st.text_input( "Enter your question:", placeholder="Please provide a short summary." ) chain_type = st.selectbox( "chain type", ("stuff", "map_reduce", "refine", "map_rerank") ) k = st.slider("Number of relevant chunks", 1, 5) with st.spinner("Retrieving and generating a response ..."): response = generate_response( pages=pages, query_text=query_text, k=k, chain_type=chain_type ) with right_column: st.write("Output of your question") if response: st.subheader("Result") st.write(response["answer"]) print("response: ", response) st.subheader("source_documents") for each in response["source_documents"]: st.write("page: ", each.metadata["page"]) st.write("source: ", each.metadata["source"]) else: st.write("response not showing at the moment") # with st.form("myform", clear_on_submit=True): # openai_api_key = st.text_input( # "OpenAI API Key", type="password", disabled=not (uploaded_file and query_text) # ) # submitted = st.form_submit_button( # "Submit", disabled=not (pages and query_text) # ) # if submitted and openai_api_key.startswith("sk-"): # with st.spinner("Calculating..."): # response = generate_response(pages, openai_api_key, query_text) # result.append(response) # del openai_api_key # if len(result): # st.info(response) # if st.button("Get Answer"): # answer = get_answer(question, document) # st.write(answer["answer"]) # # Visual annotation on the document # annotated_document = visual_annotate(document, answer["answer"]) # st.markdown(annotated_document)