kora-assistant / app.py
jscotthorn's picture
Fix LFS
cd2fc58
raw
history blame contribute delete
No virus
15.1 kB
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain.llms.openai import OpenAI
from langchain_openai import ChatOpenAI
import pandas as pd
from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.utils import print_text
import json
from langchain_core.documents import Document
import re
import gradio as gr
from examples import map_example
if TYPE_CHECKING:
from langchain_core.agents import AgentAction, AgentFinish
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
annotations = Chroma(persist_directory="db/kora-annotations", embedding_function=embedding_function)
opinions = Chroma(persist_directory="db/kora-opinions", embedding_function=embedding_function)
# The Statute Numbers look like floats to the default importer, so we'll define the shape of the data.
statutes_dict = {
'Chapter Number': 'object',
'Statute Number': 'object',
'Chapter Title': 'object',
'Section': 'object',
'Statute Title': 'object',
'Statute Body': 'object',
'History': 'object',
}
# Read the CSV file of all statutes in KRS Title VIII
df_statutes = pd.read_csv('gov.ky.krs.title.08.statutes.csv', dtype=statutes_dict)
# Filter to only the statutes in the KORA.
df_kora = df_statutes[(df_statutes["Section"] == "Open Records")]
# Load custom csv of exceptions
df_exceptions = pd.read_csv('exceptions.csv')
def get_metadata_and_content(documents):
"""
Gets the metadata and page content of a list of documents.
Args:
documents: A list of Document objects.
Returns:
A string with the page_content and metadata Key: value pairs concatenated
by a newline, and each item in the list concatenated by a double newline.
"""
output = ""
for document in documents:
output += document.page_content + "\n"
for key, value in document.metadata.items():
output += "{}: {}\n".format(key, value)
output += "\n\n"
return output
@tool
def find_case_law_annotations(query: str) -> str:
"""
Always search for annotations on relevant appelate court cases, which are binding for interpreting the law in Kentucky.
"""
citations = annotations.as_retriever().invoke(query)
return get_metadata_and_content(citations)
opinion_retriever = opinions.as_retriever();
@tool
def find_attorney_general_annotations(query: str) -> str:
"""
Perform a semantic search for relevant attorney general opinions. Only use this tool if you still need more context after looking for annotations
"""
citations = opinion_retriever.invoke(query)
return get_metadata_and_content(citations)
@tool
def lookup_statute_text(query: str) -> str:
"""Get the official text for a specific statute in the Kentucky Open Records Act by statute number"""
result = df_statutes[(df_statutes['Statute Number'] == query)]
if result.empty:
return ''
else:
result = result.iloc[0].to_dict()
return """KRS {}: {}
{}
In Chapter {}: {}
Section: {}
""".format(result['Statute Number'], result['Statute Title'], result['Statute Body'], result['Chapter Number'], result['Chapter Title'], result['Section'])
@tool
def lookup_exception_text(query: str) -> str:
"""Get the text for a specific exception to the open records law by granular number, e.g. "61.878(1)(b)", or by a label from Exception Notes, e.g. "HIPAA". After looking up an exception, be sure to look up case law and AG opinions for context in interpretation"""
result = df_exceptions[(df_exceptions['Law'] == query)]
if result.empty:
# Sometimes it uses this tool for all of 61.878, (especially gpt3.5) so we'll give it a hand.
if (query == '61.878'):
return lookup_statute_text(query)
return ''
else:
exception = 'Exception ' + query + ': ' + result.iloc[0].to_dict()['Body']
if query.startswith('61.878'):
exception += """
There are some caveats to any exemption from 61.878:
61.878(2) No exemption in this section shall be construed to prohibit disclosure of statistical information not descriptive of any readily identifiable person.
61.878(3) No exemption in this section shall be construed to deny, abridge, or impede the right of a public agency employee, including university employees, an applicant for employment, or an eligible on a register to inspect and to copy any record including preliminary and other supporting documentation that relates to him. The records shall include, but not be limited to, work plans, job performance, demotions, evaluations, promotions, compensation, classification, reallocation, transfers, lay-offs, disciplinary actions, examination scores, and preliminary and other supporting documentation. A public agency employee, including university employees, applicant, or eligible shall not have the right to inspect or to copy any examination or any documents relating to ongoing criminal or administrative investigations by an agency.
61.878(4) If any public record contains material which is not excepted under this section, the public agency shall separate the excepted and make the nonexcepted material available for examination.
61.878(5) The provisions of this section shall in no way prohibit or limit the exchange of public records or the sharing of information between public agencies when the exchange is serving a legitimate governmental need or is necessary in the performance of a legitimate government function.
61.878(6) When material is made available pursuant to a request under subsection (1)(q) of this section, the public agency shall not be required to make a copy of the recording except as provided in KRS 61.169, and the requesting parties shall not be limited in the number of times they may view the material.
"""
return exception
# Testing has returned much better performance when this context is preloaded for all queries, although more expensive.
kora_overview = """Here are some key points about the Kentucky Open Records Act:
1. “RESIDENTS” have an enforceable right to use the open records law; nonresidents may request records but don’t have an enforceable right (agency denial may be affirmed by the AG or courts). The law recognizes these categories as “residents”:
A person who lives in KY - KRS 61.870(10)(a)
A business located in KY - KRS 61.870(10)(b)
A business registered in KY - KRS 61.870(10)(c)
A person who is employed/works in KY - KRS 61.870(10)(d)
A person who owns property in KY - KRS 61.870(10)(e)
An authorized representative of any of the above - KRS 61.870(10)(f)
A news gathering organization as defined in KRS 189.635(8)(b)1 - KRS 61.870(10)(g).
2. There are two ways to prepare a request:
a) A self-prepared written request that must contain a description of the records you seek; your name printed/typed legibly; your signature, which can be electronic (/s/ First Last); and your mailing address or email address if you ask for copies by mail or email.
It should also include the date submitted; whether you wish to review records on agency premises OR receive copies by mail; a statement that your request is noncommercial (or commercial); and a statement that you are a resident and the section of KRS 61.870(10), above, under which you qualify. You can help people prepare these kinds of requests if they are asking for records that you determine are covered by the KORA.
b) A standardized open records request form available on the Attorney General's website; posted in a “prominent” place in the public agency’s office and on the agency website - KRS 61.876
3. Requests can be hand-delivered or sent by US Mail, Fax, or email. Emailed requests should be sent to the agency’s records custodian at the email address on the agency’s rules and regulations.
4. The deadline for an agency‘s written response to an open records request is five business days.
5. Requestors can appeal to the Attorney General's Office for a review of most issues related to Open Records and Open Meetings, including denials or perceived subversion of the open records law. There is no cost for this review, but agencies who receive unfavorable audits have the option to file suit against requestors in Circuit Court for a de novo review of the Attorney General's decision. Requestors can also appeal directly to the Circuit Court in the county where the public agency is registered, in place of the free Attorney General appeal.
6. Requests for public records of the Kentucky General Assembly and Legislative Research Commission (LRC) must be submitted to the LRC Director. Only those records identified in KRS 7.119(2) are public records. The only available appeal of denial of these records requests is to the Legislative Research Commission under KRS 7.119(3) and (4). NO appeal provided by statute to the Attorney General or the courts.
"""
exceptions = "There are a set of exceptions set out in KRS61.878 and others incorporated from federal and state law or established through case law. You can lookup details for any of these with the lookup_exception_text tool:\n"
for index, row in df_exceptions.iterrows():
exceptions += "query: \"{}\" {}\n".format(row['Law'], row['Brief Summary'])
statutes = "These are the statute numbers and names in the Kentucky Open Records Act:\n"
for index, row in df_kora.iterrows():
statutes += row['Statute Number'] + " - " + row['Statute Title'] + "\n"
tools = [find_case_law_annotations, find_attorney_general_annotations, lookup_exception_text, lookup_statute_text]
# Get the prompt to use - you can modify this!
prompt = hub.pull("jscotthorn/openai-ky-open-records-agent")
def get_agent_executor(api_key, model):
# Get our llm
llm = ChatOpenAI(temperature=0, model_name=model, openai_api_key=api_key)
# Construct the OpenAI Tools agent
agent = create_openai_tools_agent(llm, tools, prompt)
# Create an agent executor by passing in the agent and tools
from langchain.callbacks import StdOutCallbackHandler
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
return_intermediate_steps=True,
#verbose=True,
)
return agent_executor
# Pretty labels shown to the end user
tool_labels = {
"find_case_law_annotations": "Searched case law annotations",
"find_attorney_general_annotations": "Searched AG decisions",
"lookup_exception_text": "Looked up exception",
"lookup_statute_text": "Looked up statute",
}
def format_tools_used(input_array):
"""
Extracts the tool, query, and final string values from an array of OpenAIToolAgentAction objects.
"""
tool_markup = '';
for item in input_array:
tool_markup += "#### {}: {}\n".format(
tool_labels[item[0].tool],
item[0].tool_input['query'],
) + item[1] + '\n\n'
return tool_markup
# Button click event
def ask_question(api_key, model, question):
agent_executor = get_agent_executor(api_key, model)
results = agent_executor.invoke({
"input": question,
"exception_list": [exceptions],
"kora_overview": [kora_overview],
"kora_statutes": [statutes],
})
return [
results["output"],
format_tools_used(results['intermediate_steps']),
]
css_path = 'custom_style.css'
with gr.Blocks(title="Kentucky Open Records Assistant", css=css_path) as demo:
with gr.Row():
with gr.Column(variant="box", scale=2):
with gr.Group():
gr.Markdown("##   KY Open Records Assistant")
api_key = gr.Textbox(label="OpenAI API Key", lines=1)
question = gr.Textbox(label="Question", lines=2)
model_name = gr.Dropdown(
["gpt-4-0125-preview", "gpt-4-1106-preview", "gpt-3.5-turbo-0125", "gpt-3.5-turbo-1106"],
label="LLM Model",
value="gpt-4-0125-preview"
)
gr.Markdown('Questions with GPT-4 have been observed costing up to 20 cents, average 3-5')
run = gr.Button()
with gr.Column(variant="box", scale=4):
with gr.Group(elem_classes="scrolling-group padded"):
gr.Markdown("### Response")
response = gr.Markdown()
with gr.Column(variant="box", scale=3):
with gr.Group(elem_classes="scrolling-group padded"):
gr.Markdown("### Agent Tool Use")
toolsUsed = gr.Markdown()
with gr.Row():
with gr.Column(variant="box", scale=2):
gr.Examples(
[
'Are police body cam and dash cam footage open records?',
'Are environmental impact reports and studies open records?',
'Are social services case files such as foster care and adoption open records?',
'Are information on state-run programs and their effectiveness open records?',
'Are settlements in civil rights cases open records?',
],
question,
[response, toolsUsed],
map_example,
cache_examples=True,
)
with gr.Column(variant="box", scale=4):
gr.Markdown("### DISCLAIMER \n This tool can provide innaccurate information including hallucinations, incorrect citations, and problematic output, especially if prompted to do so. This tool cannot be used for legal advice.")
gr.Markdown("### Tech Used \n Chroma, Langchain OpenAI Tools Agent, Custom Tools.")
with gr.Column(variant="box", scale=3):
gr.Markdown("### Tools Available to AI Agent: \n - Lookup statute \n - Lookup exception (curated list) \n - Search Case Annotation (from KRS) \n - Search Attorney General Opinions (from KRS).")
gr.Markdown("### Data Sources: \n - [Kentucky Revised Statutes](https://archive.org/details/gov.ky.code) - raw files were [parsed](https://github.com/jscotthorn/extract-state-codes-data) into structured data. Statutes are looked up by number, annotations are [indexed in chroma dbs for semantic search](https://www.kaggle.com/code/jscotthorn/rag-question-answer-pipeline-for-open-records/notebook?scriptVersionId=165199302). \n - Exception list derived from [RCFP.org](https://www.rcfp.org/open-government-guide/kentucky/#c-court-derived-exclusions-common-law-prohibitions-recognized-privileges-against-disclosure)")
run.click(ask_question, inputs=[api_key, model_name, question], outputs=[response, toolsUsed])
if __name__ == "__main__":
demo.launch()