farhananis005 commited on
Commit
94d4a49
1 Parent(s): 74f3f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -59
app.py CHANGED
@@ -1,36 +1,16 @@
1
- # -*- coding: utf-8 -*-
2
- """Lawyer GPT
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1RPc_qH7s0_hsOswGpWRFaXbLT3eBIShJ
8
- """
9
-
10
- !pip install langchain
11
- !pip install langchain-openai
12
- !pip install PyPDF2
13
- !pip install pypdf
14
- !pip install docx2txt
15
- !pip install unstructured
16
- !pip install gradio
17
- !pip install faiss-cpu
18
- !pip install openai
19
- !pip install tiktoken
20
-
21
  import os
22
  import openai
23
 
24
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
- os.environ["OPENAI_API_KEY"] = "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"
26
- openai.api_key = "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"
27
 
28
  def save_docs(docs):
29
 
30
  import shutil
31
  import os
32
 
33
- output_dir="/content/docs/"
34
 
35
  if os.path.exists(output_dir):
36
  shutil.rmtree(output_dir)
@@ -43,6 +23,7 @@ def save_docs(docs):
43
 
44
  return "Successful!"
45
 
 
46
  def process_docs():
47
 
48
  from langchain.document_loaders import PyPDFLoader
@@ -55,19 +36,29 @@ def process_docs():
55
  from langchain_openai import OpenAIEmbeddings
56
  from langchain.text_splitter import RecursiveCharacterTextSplitter
57
 
58
- loader1 = DirectoryLoader('/content/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
 
 
59
  document1 = loader1.load()
60
 
61
- loader2 = DirectoryLoader('/content/docs/', glob="./*.txt", loader_cls=TextLoader)
 
 
62
  document2 = loader2.load()
63
 
64
- loader3 = DirectoryLoader('/content/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
 
 
65
  document3 = loader3.load()
66
 
67
- loader4 = DirectoryLoader('/content/docs/', glob="./*.csv", loader_cls=CSVLoader)
 
 
68
  document4 = loader4.load()
69
 
70
- loader5 = DirectoryLoader('/content/docs/', glob="./*.xlsx", loader_cls=UnstructuredExcelLoader)
 
 
71
  document5 = loader5.load()
72
 
73
  document1.extend(document2)
@@ -76,43 +67,45 @@ def process_docs():
76
  document1.extend(document5)
77
 
78
  text_splitter = RecursiveCharacterTextSplitter(
79
- chunk_size=1000,
80
- chunk_overlap=200,
81
- length_function=len
82
  )
83
 
84
  docs = text_splitter.split_documents(document1)
85
  embeddings = OpenAIEmbeddings()
86
 
87
  docs_db = FAISS.from_documents(docs, embeddings)
88
- docs_db.save_local("/content/docs_db/")
89
 
90
  return "Successful!"
91
 
 
92
  global agent
93
 
 
94
  def create_agent():
95
 
96
  from langchain_openai import ChatOpenAI
97
  from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
98
  from langchain.chains import ConversationChain
 
99
  global agent
100
 
101
- llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k')
102
  memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
103
  agent = ConversationChain(llm=llm, memory=memory, verbose=True)
104
 
105
  return "Successful!"
106
 
 
107
  def formatted_response(docs, question, response, state):
108
 
109
  formatted_output = response + "\n\nSources"
110
 
111
  for i, doc in enumerate(docs):
112
- source_info = doc.metadata.get('source', 'Unknown source')
113
- page_info = doc.metadata.get('page', None)
114
 
115
- doc_name = source_info.split('/')[-1].strip()
116
 
117
  if page_info is not None:
118
  formatted_output += f"\n{doc_name}\tpage no {page_info}"
@@ -122,18 +115,22 @@ def formatted_response(docs, question, response, state):
122
  state.append((question, formatted_output))
123
  return state, state
124
 
 
125
  def search_docs(prompt, question, state):
126
 
127
  from langchain_openai import OpenAIEmbeddings
128
  from langchain.vectorstores import FAISS
129
  from langchain.callbacks import get_openai_callback
 
130
  global agent
131
  agent = agent
132
 
133
  state = state or []
134
 
135
  embeddings = OpenAIEmbeddings()
136
- docs_db = FAISS.load_local("/content/docs_db/", embeddings, allow_dangerous_deserialization = True)
 
 
137
  docs = docs_db.similarity_search(question)
138
 
139
  prompt += "\n\n"
@@ -147,6 +144,7 @@ def search_docs(prompt, question, state):
147
 
148
  return formatted_response(docs, question, response, state)
149
 
 
150
  import gradio as gr
151
 
152
  css = """
@@ -164,45 +162,58 @@ with gr.Blocks(css=css) as demo:
164
  gr.Markdown("## <center>Lawyer GPT: Your AI Legal Assistant</center>")
165
 
166
  with gr.Tab("Lawyer GPT: Your AI Legal Assistant"):
167
- with gr.Column(elem_classes="col"):
168
 
169
- with gr.Tab("Upload and Process Documents"):
170
- with gr.Column():
171
 
172
- docs_upload_input = gr.Files(label="Upload File(s)")
173
- docs_upload_button = gr.Button("Upload")
174
- docs_upload_output = gr.Textbox(label="Output")
175
 
176
- docs_process_button = gr.Button("Process")
177
- docs_process_output = gr.Textbox(label="Output")
178
 
179
- create_agent_button = gr.Button("Create Agent")
180
- create_agent_output = gr.Textbox(label="Output")
181
 
182
- gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output, create_agent_output])
 
 
 
 
 
 
 
183
 
184
- with gr.Tab("Query Documents"):
185
- with gr.Column():
186
 
187
- docs_prompt_input = gr.Textbox(label="Custom Prompt")
188
 
189
- docs_chatbot = gr.Chatbot(label="Chats")
190
- docs_state = gr.State()
191
 
192
- docs_search_input = gr.Textbox(label="Question")
193
- docs_search_button = gr.Button("Search")
194
 
195
- gr.ClearButton([docs_prompt_input, docs_search_input])
196
 
197
  #########################################################################################################
198
 
199
- docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
 
 
200
  docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
201
  create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
202
 
203
- docs_search_button.click(search_docs, inputs=[docs_prompt_input, docs_search_input, docs_state], outputs=[docs_chatbot, docs_state])
 
 
 
 
204
 
205
  #########################################################################################################
206
 
207
  demo.queue()
208
- demo.launch(debug=True, share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import openai
3
 
4
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
+ os.environ["OPENAI_API_KEY"]
6
+
7
 
8
  def save_docs(docs):
9
 
10
  import shutil
11
  import os
12
 
13
+ output_dir = "/home/user/app/docs/"
14
 
15
  if os.path.exists(output_dir):
16
  shutil.rmtree(output_dir)
 
23
 
24
  return "Successful!"
25
 
26
+
27
  def process_docs():
28
 
29
  from langchain.document_loaders import PyPDFLoader
 
36
  from langchain_openai import OpenAIEmbeddings
37
  from langchain.text_splitter import RecursiveCharacterTextSplitter
38
 
39
+ loader1 = DirectoryLoader(
40
+ "/home/user/app/docs/", glob="./*.pdf", loader_cls=PyPDFLoader
41
+ )
42
  document1 = loader1.load()
43
 
44
+ loader2 = DirectoryLoader(
45
+ "/home/user/app/docs/", glob="./*.txt", loader_cls=TextLoader
46
+ )
47
  document2 = loader2.load()
48
 
49
+ loader3 = DirectoryLoader(
50
+ "/home/user/app/docs/", glob="./*.docx", loader_cls=Docx2txtLoader
51
+ )
52
  document3 = loader3.load()
53
 
54
+ loader4 = DirectoryLoader(
55
+ "/home/user/app/docs/", glob="./*.csv", loader_cls=CSVLoader
56
+ )
57
  document4 = loader4.load()
58
 
59
+ loader5 = DirectoryLoader(
60
+ "/home/user/app/docs/", glob="./*.xlsx", loader_cls=UnstructuredExcelLoader
61
+ )
62
  document5 = loader5.load()
63
 
64
  document1.extend(document2)
 
67
  document1.extend(document5)
68
 
69
  text_splitter = RecursiveCharacterTextSplitter(
70
+ chunk_size=1000, chunk_overlap=200, length_function=len
 
 
71
  )
72
 
73
  docs = text_splitter.split_documents(document1)
74
  embeddings = OpenAIEmbeddings()
75
 
76
  docs_db = FAISS.from_documents(docs, embeddings)
77
+ docs_db.save_local("/home/user/app/docs_db/")
78
 
79
  return "Successful!"
80
 
81
+
82
  global agent
83
 
84
+
85
  def create_agent():
86
 
87
  from langchain_openai import ChatOpenAI
88
  from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
89
  from langchain.chains import ConversationChain
90
+
91
  global agent
92
 
93
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
94
  memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
95
  agent = ConversationChain(llm=llm, memory=memory, verbose=True)
96
 
97
  return "Successful!"
98
 
99
+
100
  def formatted_response(docs, question, response, state):
101
 
102
  formatted_output = response + "\n\nSources"
103
 
104
  for i, doc in enumerate(docs):
105
+ source_info = doc.metadata.get("source", "Unknown source")
106
+ page_info = doc.metadata.get("page", None)
107
 
108
+ doc_name = source_info.split("/")[-1].strip()
109
 
110
  if page_info is not None:
111
  formatted_output += f"\n{doc_name}\tpage no {page_info}"
 
115
  state.append((question, formatted_output))
116
  return state, state
117
 
118
+
119
  def search_docs(prompt, question, state):
120
 
121
  from langchain_openai import OpenAIEmbeddings
122
  from langchain.vectorstores import FAISS
123
  from langchain.callbacks import get_openai_callback
124
+
125
  global agent
126
  agent = agent
127
 
128
  state = state or []
129
 
130
  embeddings = OpenAIEmbeddings()
131
+ docs_db = FAISS.load_local(
132
+ "/home/user/app/docs_db/", embeddings, allow_dangerous_deserialization=True
133
+ )
134
  docs = docs_db.similarity_search(question)
135
 
136
  prompt += "\n\n"
 
144
 
145
  return formatted_response(docs, question, response, state)
146
 
147
+
148
  import gradio as gr
149
 
150
  css = """
 
162
  gr.Markdown("## <center>Lawyer GPT: Your AI Legal Assistant</center>")
163
 
164
  with gr.Tab("Lawyer GPT: Your AI Legal Assistant"):
165
+ with gr.Column(elem_classes="col"):
166
 
167
+ with gr.Tab("Upload and Process Documents"):
168
+ with gr.Column():
169
 
170
+ docs_upload_input = gr.Files(label="Upload File(s)")
171
+ docs_upload_button = gr.Button("Upload")
172
+ docs_upload_output = gr.Textbox(label="Output")
173
 
174
+ docs_process_button = gr.Button("Process")
175
+ docs_process_output = gr.Textbox(label="Output")
176
 
177
+ create_agent_button = gr.Button("Create Agent")
178
+ create_agent_output = gr.Textbox(label="Output")
179
 
180
+ gr.ClearButton(
181
+ [
182
+ docs_upload_input,
183
+ docs_upload_output,
184
+ docs_process_output,
185
+ create_agent_output,
186
+ ]
187
+ )
188
 
189
+ with gr.Tab("Query Documents"):
190
+ with gr.Column():
191
 
192
+ docs_prompt_input = gr.Textbox(label="Custom Prompt")
193
 
194
+ docs_chatbot = gr.Chatbot(label="Chats")
195
+ docs_state = gr.State()
196
 
197
+ docs_search_input = gr.Textbox(label="Question")
198
+ docs_search_button = gr.Button("Search")
199
 
200
+ gr.ClearButton([docs_prompt_input, docs_search_input])
201
 
202
  #########################################################################################################
203
 
204
+ docs_upload_button.click(
205
+ save_docs, inputs=docs_upload_input, outputs=docs_upload_output
206
+ )
207
  docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
208
  create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
209
 
210
+ docs_search_button.click(
211
+ search_docs,
212
+ inputs=[docs_prompt_input, docs_search_input, docs_state],
213
+ outputs=[docs_chatbot, docs_state],
214
+ )
215
 
216
  #########################################################################################################
217
 
218
  demo.queue()
219
+ demo.launch(debug=True, share=True)