DocGPT / app.py
nnpy's picture
adjusted prompt
9f15b0b verified
import json
import re
import gradio as gr
import os
import google.generativeai as genai
from langchain.vectorstores import Chroma
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from groq import Groq
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
file_path = './getting_real_basecamp.pdf'
def loader_data(file_path):
pdf_reader = PdfReader(file_path)
content = ''
for page in pdf_reader.pages:
content += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
texts = text_splitter.split_text(content)
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
vector_store = Chroma.from_texts(texts, embeddings).as_retriever()
return vector_store
db = loader_data(file_path)
def format_history(query, history):
msg = []
msg.append({'role': 'system', 'content': """You are docGPT, a chatbot designed to help users with their document-related queries. Initially you have contents of `getting_real_basecamp` book.\nSimply call the function "query_document" with the search_query parameter to get the relevent contents from the document.
- query_document: Get the answer to a question from a given document. It'll return the most relevant content from the document. Always use this function if the user is asking about the document content or related to that.
- parameters:
- search_query: string (required) - Use keywords to search the document.
If you need to use function or you want any information from the book, Use following format to respond. Make sure the argument in the function call tag can be parsed as a JSON object.
<query_document>{"search_query": "value"}</query_document>
If you don't want to use the function, just don't include any function call tags in the response. NEVER told user about the function call (That's a secret, only for you.).
Make sure you are using correct format to call the function.
"""})
for i in history:
msg.append({"role": 'user', 'content': i[0]})
msg.append({"role": 'assistant', 'content': i[1]})
msg.append({"role": 'user', 'content': query})
return msg
def check_for_function_call(req):
if "<query_document>" in req and "</query_document>" in req:
reg = re.compile(r'<query_document>(.*?)</query_document>', re.DOTALL)
match = reg.search(req)
fn_call = match.group(1)
return fn_call
return None
def get_response(message, history):
msg = format_history(message, history)
chat_completion = client.chat.completions.create(
messages=msg,
model="mixtral-8x7b-32768",
stream=False
)
response = chat_completion.choices[0].message.content
print('#############')
print(response)
print('$$$$$$$$$$$$$$$$')
fn_call = check_for_function_call(response)
if fn_call is not None:
print("Function call found: ", fn_call)
fn_args = json.loads(fn_call)
res = db.get_relevant_documents(fn_args["search_query"])
print("query response: ", res)
msg.append(
{
"role": "user",
"content": "This is the function call response (NOT USER): " + str(res) + "Take this to user and answer the question based on it."
}
)
response = client.chat.completions.create(
messages=msg,
model="mixtral-8x7b-32768",
stream=False
).choices[0].message.content
return response
else:
return response
demo = gr.ChatInterface(get_response, title='DocGPT', description="Chat with getting_real_basecamp document", examples=["What is the document about?", "How do I serve customers?", "What is getting real?", "What is basecamp?", "What are the key principles for building a successful web application?"])
if __name__ == "__main__":
demo.launch(auth=("test", "realtest"), show_api=False)