DataAIDemo / pages /jury_records.py
themeetjani's picture
Update pages/jury_records.py
663c3a8 verified
raw
history blame
4.67 kB
#import the necessary packages
import streamlit as st
from streamlit import session_state
from langchain.document_loaders import WebBaseLoader, PyPDFLoader, TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
import os
from langchain.chat_models import ChatOpenAI
import openai
import json
#need to set openai key or set it as a environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
model = ChatOpenAI(model = 'gpt-4', max_tokens = 100,temperature=0)
st.set_page_config(page_title="jury_records", page_icon="📈")
# using this function to extract the content from the url. here we are using langchain webbaseloader to extract the content. We can use any web scrapping function also.
def extract(link):
res = []
loader = WebBaseLoader(link)
pages = loader.load()
for i in pages:
res.append(i.page_content.replace('\n',''))
a = " ".join(res)
print(len(a))
if len(a)>0:
return a
else:
return 'error'
#Summarize the content with use of gpt4 with prompting.
def summarize(link):
context = extract(link)
if context!='error':
#print(context)
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": f"Following context is given.{context}" },
{
"role": "user",
"content": '''Summarize the content in detail. Follow these instructions while summarizing.\n Include case no.\n Include all Plaintiff. \n Include the court name.
\n Alias name should be included.\n Include case no. \n Include all defendants.\n If place is mentioned then include it, otherwise don't include it.
\n Date format should be dd/mm/yyyy.\n If case is settled for an amount then try to include the amount.
If amount is not mentioned don't mentioned anything about the same. only include this line if case is
setteled otherwise include the status of case.\n\n<<REMEMBER>>\n\n Please try to include all the details. Don't leave out any information.'''
}
],
temperature=0,
max_tokens=1000,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response.choices[0].message.content.strip()
else:
return 'error'
# Passing these questions dictinary for qna. there are lot of iterations has been done and this is final questions dictionary that we have come up with. you can change this dictionary based on input parameters those needs to be extracted from url.
info_detail = {'case_type':'provide case type or court system like "Criminal", "Family Law", "labour law"',
'name_of_court': 'provide name of court or jail or court record.',
'case_number': 'provide case number or country case number or bankrupty case number', 'date_filed': 'what is the date when the case was filed or the date when case first formally/officially submitted?',
'plaintiff': 'Names of the Petitioner or plaintiff or applicant? ',
'defendants': "Names of all defendants, respondent and alias. Name entity under 'Defendants'",
'nature_of_action': 'Summarize the reason behind the case within 20 words in detail',
'status': 'what is the status of case?'}
#langchain function for qna over the summary extracted from gpt4. vector database concept has been adopted.
def lang(context):
answer_dict={}
docs = Document(page_content=context)
index2 = VectorstoreIndexCreator().from_documents([docs])
for key in info_detail:
ques = info_detail[key]
answer_dict[key] = index2.query(llm = model, question = ques)
index2.vectorstore.delete_collection()
return answer_dict
def process(url):
try:
summary = summarize(url)
if summary == 'error':
return {"details":"","status":False}
else:
answer_dict = lang(summary)
return answer_dict
except:
return "Please try again"
if 'jury_records_dict' not in session_state:
session_state['jury_records_dict']= ""
def Jury(url):
session_state['jury_records_dict']= process(jury_url)
st.title("Jury Records")
jury_url= st.text_area(label= "Please enter the jury records link",
placeholder="Jury records Link")
st.text_area("result", value=session_state['jury_records_dict'])
st.button("Get answer dictionary", on_click=Jury, args=[jury_url])