Zwea Htet commited on
Commit
781a2e4
β€’
1 Parent(s): d38bde6

added langchain openai support document chat

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +156 -0
  3. requirements.txt +8 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+
3
+ .env
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reference https://huggingface.co/spaces/johnmuchiri/anspro1/blob/main/app.py
2
+ # Resource https://python.langchain.com/docs/modules/chains
3
+
4
+ import streamlit as st
5
+ from langchain_community.document_loaders.pdf import PyPDFLoader
6
+ from langchain_community.vectorstores import pinecone
7
+ from langchain_openai import OpenAIEmbeddings, OpenAI
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain_core.prompts import ChatPromptTemplate
10
+ from langchain.chains import ConversationalRetrievalChain, RetrievalQA
11
+ import openai
12
+ from dotenv import load_dotenv
13
+ import os
14
+
15
+ # import pinecone
16
+
17
+ load_dotenv()
18
+
19
+ # please create a streamlit app on huggingface that uses openai api
20
+ # and langchain data framework, the user should be able to upload
21
+ # a document and ask questions about the document, the app should
22
+ # respond with an answer and also display where the response is
23
+ # referenced from using some sort of visual annotation on the document
24
+
25
+ # set the path where you want to save the uploaded PDF file
26
+ SAVE_DIR = "pdf"
27
+
28
+
29
+ def generate_response(pages, query_text, k, chain_type):
30
+ if pages is not None:
31
+ pinecone.init(
32
+ api_key=os.getenv("PINECONE_API_KEY"),
33
+ environment=os.getenv("PINECONE_ENV_NAME"),
34
+ )
35
+
36
+ vector_db = pinecone.Pinecone.from_documents(
37
+ documents=pages, embedding=OpenAIEmbeddings(), index_name="openai-index"
38
+ )
39
+
40
+ retriever = vector_db.as_retriever(
41
+ search_type="similarity", search_kwards={"k": k}
42
+ )
43
+
44
+ # create a chain to answer questions
45
+ qa = RetrievalQA.from_chain_type(
46
+ llm=OpenAI(),
47
+ chain_type=chain_type,
48
+ retriever=retriever,
49
+ return_source_documents=True
50
+ )
51
+
52
+ response = qa({"query": query_text})
53
+ return response
54
+
55
+ def visual_annotate(document, answer):
56
+ # Implement this function according to your specific requirements
57
+ # Highlight the part of the document where the answer was found
58
+ start = document.find(answer)
59
+ annotated_document = (
60
+ document[:start]
61
+ + "**"
62
+ + document[start : start + len(answer)]
63
+ + "**"
64
+ + document[start + len(answer) :]
65
+ )
66
+ return annotated_document
67
+
68
+
69
+ st.set_page_config(page_title="πŸ¦œπŸ”— Ask the Doc App")
70
+ st.title("Document Question Answering App")
71
+
72
+ with st.sidebar.form(key="sidebar-form"):
73
+ st.header("Configurations")
74
+
75
+ openai_api_key = st.text_input("Enter OpenAI API key here", type="password")
76
+ os.environ["OPENAI_API_KEY"] = openai_api_key
77
+
78
+ pinecone_api_key = st.text_input(
79
+ "Enter your Pinecone environment key", type="password"
80
+ )
81
+ os.environ["PINECONE_API_KEY"] = pinecone_api_key
82
+
83
+ pinecone_env_name = st.text_input("Enter your Pinecone environment name)")
84
+ os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
85
+
86
+ submitted = st.sidebar.form_submit_button(
87
+ label="Submit",
88
+ disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
89
+ )
90
+
91
+ left_column, right_column = st.columns(2)
92
+
93
+ with left_column:
94
+ uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
95
+
96
+ if uploaded_file is not None:
97
+ # save the uploaded file to the specified directory
98
+ file_path = os.path.join(SAVE_DIR, uploaded_file.name)
99
+ with open(file_path, "wb") as f:
100
+ f.write(uploaded_file.getbuffer())
101
+ st.success(f"File {uploaded_file.name} is saved at path {file_path}")
102
+
103
+ loader = PyPDFLoader(file_path=file_path)
104
+ pages = loader.load_and_split()
105
+
106
+ query_text = st.text_input(
107
+ "Enter your question:", placeholder="Please provide a short summary."
108
+ )
109
+
110
+ chain_type = st.selectbox(
111
+ "chain type", ("stuff", "map_reduce", "refine", "map_rerank")
112
+ )
113
+
114
+ k = st.slider("Number of relevant chunks", 1, 5)
115
+
116
+ with st.spinner("Retrieving and generating a response ..."):
117
+ response = generate_response(
118
+ pages=pages,
119
+ query_text=query_text,
120
+ k=k,
121
+ chain_type=chain_type
122
+ )
123
+
124
+ with right_column:
125
+ st.write("Output of your question")
126
+
127
+ st.subheader("Result")
128
+ st.write(response['result'])
129
+
130
+ st.subheader("source_documents")
131
+ st.write(response['source_documents'][0])
132
+
133
+
134
+ # with st.form("myform", clear_on_submit=True):
135
+ # openai_api_key = st.text_input(
136
+ # "OpenAI API Key", type="password", disabled=not (uploaded_file and query_text)
137
+ # )
138
+ # submitted = st.form_submit_button(
139
+ # "Submit", disabled=not (pages and query_text)
140
+ # )
141
+ # if submitted and openai_api_key.startswith("sk-"):
142
+ # with st.spinner("Calculating..."):
143
+ # response = generate_response(pages, openai_api_key, query_text)
144
+ # result.append(response)
145
+ # del openai_api_key
146
+
147
+ # if len(result):
148
+ # st.info(response)
149
+
150
+ # if st.button("Get Answer"):
151
+ # answer = get_answer(question, document)
152
+ # st.write(answer["answer"])
153
+
154
+ # # Visual annotation on the document
155
+ # annotated_document = visual_annotate(document, answer["answer"])
156
+ # st.markdown(annotated_document)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ langchain
3
+ openai
4
+ python-dotenv
5
+ langchain_openai
6
+ langchain_community
7
+ pypdf
8
+ pinecone-client