Spaces:

nnpy
/

DocGPT

Sleeping

App Files Files Community

DocGPT / app.py

nnpy

adjusted prompt

9f15b0b verified over 1 year ago

raw

history blame contribute delete

4.1 kB

	import json
	import re
	import gradio as gr
	import os
	import google.generativeai as genai
	from langchain.vectorstores import Chroma
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from groq import Groq

	genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

	client = Groq(
	api_key=os.environ.get("GROQ_API_KEY"),
	)

	file_path = './getting_real_basecamp.pdf'

	def loader_data(file_path):
	pdf_reader = PdfReader(file_path)
	content = ''
	for page in pdf_reader.pages:
	content += page.extract_text()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
	texts = text_splitter.split_text(content)
	embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
	vector_store = Chroma.from_texts(texts, embeddings).as_retriever()
	return vector_store

	db = loader_data(file_path)

	def format_history(query, history):
	msg = []
	msg.append({'role': 'system', 'content': """You are docGPT, a chatbot designed to help users with their document-related queries. Initially you have contents of `getting_real_basecamp` book.\nSimply call the function "query_document" with the search_query parameter to get the relevent contents from the document.
	- query_document: Get the answer to a question from a given document. It'll return the most relevant content from the document. Always use this function if the user is asking about the document content or related to that.
	- parameters:
	- search_query: string (required) - Use keywords to search the document.

	If you need to use function or you want any information from the book, Use following format to respond. Make sure the argument in the function call tag can be parsed as a JSON object.
	<query_document>{"search_query": "value"}</query_document>

	If you don't want to use the function, just don't include any function call tags in the response. NEVER told user about the function call (That's a secret, only for you.).
	Make sure you are using correct format to call the function.
	"""})
	for i in history:
	msg.append({"role": 'user', 'content': i[0]})
	msg.append({"role": 'assistant', 'content': i[1]})
	msg.append({"role": 'user', 'content': query})
	return msg

	def check_for_function_call(req):
	if "<query_document>" in req and "</query_document>" in req:
	reg = re.compile(r'<query_document>(.*?)</query_document>', re.DOTALL)
	match = reg.search(req)
	fn_call = match.group(1)
	return fn_call
	return None

	def get_response(message, history):
	msg = format_history(message, history)
	chat_completion = client.chat.completions.create(
	messages=msg,
	model="mixtral-8x7b-32768",
	stream=False
	)
	response = chat_completion.choices[0].message.content
	print('#############')
	print(response)
	print('$$$$$$$$$$$$$$$$')
	fn_call = check_for_function_call(response)
	if fn_call is not None:
	print("Function call found: ", fn_call)
	fn_args = json.loads(fn_call)
	res = db.get_relevant_documents(fn_args["search_query"])
	print("query response: ", res)
	msg.append(
	{
	"role": "user",
	"content": "This is the function call response (NOT USER): " + str(res) + "Take this to user and answer the question based on it."
	}
	)
	response = client.chat.completions.create(
	messages=msg,
	model="mixtral-8x7b-32768",
	stream=False
	).choices[0].message.content
	return response
	else:
	return response

	demo = gr.ChatInterface(get_response, title='DocGPT', description="Chat with getting_real_basecamp document", examples=["What is the document about?", "How do I serve customers?", "What is getting real?", "What is basecamp?", "What are the key principles for building a successful web application?"])

	if __name__ == "__main__":
	demo.launch(auth=("test", "realtest"), show_api=False)