LawyerGPT / app.py
farhananis005's picture
Update app.py
3b90d90
raw
history blame contribute delete
No virus
5.86 kB
import os
import openai
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OPENAI_API_KEY"]
def save_docs(docs):
import shutil
import os
output_dir="/home/user/app/docs/"
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for doc in docs:
shutil.copy(doc.name, output_dir)
return "Successful!"
def process_docs():
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
document1 = loader1.load()
loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader)
document2 = loader2.load()
loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
document3 = loader3.load()
loader4 = DirectoryLoader('/home/user/app/docs/', glob="./*.csv", loader_cls=CSVLoader)
document4 = loader4.load()
loader5 = DirectoryLoader('/home/user/app/docs/', glob="./*.xlsx", loader_cls=UnstructuredExcelLoader)
document5 = loader5.load()
document1.extend(document2)
document1.extend(document3)
document1.extend(document4)
document1.extend(document5)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
docs = text_splitter.split_documents(document1)
embeddings = OpenAIEmbeddings()
docs_db = FAISS.from_documents(docs, embeddings)
docs_db.save_local("/home/user/app/docs_db/")
return "Successful!"
global agent
def create_agent():
from langchain.chat_models import ChatOpenAI
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
global agent
llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k')
memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
agent = ConversationChain(llm=llm, memory=memory, verbose=True)
return "Successful!"
def formatted_response(docs, question, response, state):
formatted_output = response + "\n\nSources"
for i, doc in enumerate(docs):
source_info = doc.metadata.get('source', 'Unknown source')
page_info = doc.metadata.get('page', None)
doc_name = source_info.split('/')[-1].strip()
if page_info is not None:
formatted_output += f"\n{doc_name}\tpage no {page_info}"
else:
formatted_output += f"\n{doc_name}"
state.append((question, formatted_output))
return state, state
def search_docs(prompt, question, state):
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.callbacks import get_openai_callback
global agent
agent = agent
state = state or []
embeddings = OpenAIEmbeddings()
docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings)
docs = docs_db.similarity_search(question)
prompt += "\n\n"
prompt += question
prompt += "\n\n"
prompt += str(docs)
with get_openai_callback() as cb:
response = agent.predict(input=prompt)
print(cb)
return formatted_response(docs, question, response, state)
import gradio as gr
css = """
.col{
max-width: 75%;
margin: 0 auto;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("## <center>All in One Document Chatting App</center>")
with gr.Tab("Chat With Your Documents"):
with gr.Column(elem_classes="col"):
with gr.Tab("Upload and Process Documents"):
with gr.Column():
docs_upload_input = gr.Files(label="Upload File(s)")
docs_upload_button = gr.Button("Upload")
docs_upload_output = gr.Textbox(label="Output")
docs_process_button = gr.Button("Process")
docs_process_output = gr.Textbox(label="Output")
create_agent_button = gr.Button("Create Agent")
create_agent_output = gr.Textbox(label="Output")
gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output, create_agent_output])
with gr.Tab("Query Documents"):
with gr.Column():
docs_prompt_input = gr.Textbox(label="Custom Prompt")
docs_chatbot = gr.Chatbot(label="Chats")
docs_state = gr.State()
docs_search_input = gr.Textbox(label="Question")
docs_search_button = gr.Button("Search")
gr.ClearButton([docs_prompt_input, docs_search_input])
#########################################################################################################
docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
docs_search_button.click(search_docs, inputs=[docs_prompt_input, docs_search_input, docs_state], outputs=[docs_chatbot, docs_state])
#########################################################################################################
demo.queue()
demo.launch()