DataAIDemo

Sleeping

App Files Files Community

DataAIDemo / pages /jury_records.py

themeetjani

Upload 10 files

6060e42 verified 9 months ago

raw

history blame

4.65 kB

	#import the necessary packages
	import streamlit as st
	from streamlit import session_state
	from langchain.document_loaders import WebBaseLoader, PyPDFLoader, TextLoader
	from langchain.indexes import VectorstoreIndexCreator
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.docstore.document import Document
	import os
	from langchain.chat_models import ChatOpenAI
	import openai
	import json
	#need to set openai key or set it as a environment variable
	openai.api_key = "give api key"
	model = ChatOpenAI(model = 'gpt-4', max_tokens = 100,temperature=0)
	st.set_page_config(page_title="jury_records", page_icon="📈")
	# using this function to extract the content from the url. here we are using langchain webbaseloader to extract the content. We can use any web scrapping function also.
	def extract(link):
	res = []
	loader = WebBaseLoader(link)
	pages = loader.load()
	for i in pages:
	res.append(i.page_content.replace('\n',''))
	a = " ".join(res)
	print(len(a))
	if len(a)>0:
	return a
	else:
	return 'error'

	#Summarize the content with use of gpt4 with prompting.
	def summarize(link):
	context = extract(link)
	if context!='error':
	#print(context)
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": f"Following context is given.{context}" },
	{
	"role": "user",
	"content": '''Summarize the content in detail. Follow these instructions while summarizing.\n Include case no.\n Include all Plaintiff. \n Include the court name.
	\n Alias name should be included.\n Include case no. \n Include all defendants.\n If place is mentioned then include it, otherwise don't include it.
	\n Date format should be dd/mm/yyyy.\n If case is settled for an amount then try to include the amount.
	If amount is not mentioned don't mentioned anything about the same. only include this line if case is
	setteled otherwise include the status of case.\n\n<<REMEMBER>>\n\n Please try to include all the details. Don't leave out any information.'''
	}
	],
	temperature=0,
	max_tokens=1000,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)
	return response.choices[0].message.content.strip()
	else:
	return 'error'

	# Passing these questions dictinary for qna. there are lot of iterations has been done and this is final questions dictionary that we have come up with. you can change this dictionary based on input parameters those needs to be extracted from url.
	info_detail = {'case_type':'provide case type or court system like "Criminal", "Family Law", "labour law"',
	'name_of_court': 'provide name of court or jail or court record.',
	'case_number': 'provide case number or country case number or bankrupty case number', 'date_filed': 'what is the date when the case was filed or the date when case first formally/officially submitted?',
	'plaintiff': 'Names of the Petitioner or plaintiff or applicant? ',
	'defendants': "Names of all defendants, respondent and alias. Name entity under 'Defendants'",
	'nature_of_action': 'Summarize the reason behind the case within 20 words in detail',
	'status': 'what is the status of case?'}

	#langchain function for qna over the summary extracted from gpt4. vector database concept has been adopted.
	def lang(context):
	answer_dict={}
	docs = Document(page_content=context)
	index2 = VectorstoreIndexCreator().from_documents([docs])
	for key in info_detail:
	ques = info_detail[key]
	answer_dict[key] = index2.query(llm = model, question = ques)
	index2.vectorstore.delete_collection()
	return answer_dict

	def process(url):
	try:
	summary = summarize(url)
	if summary == 'error':
	return {"details":"","status":False}
	else:
	answer_dict = lang(summary)
	return answer_dict
	except:
	return "Please try again"
	if 'jury_records_dict' not in session_state:
	session_state['jury_records_dict']= ""

	def Jury(url):
	session_state['jury_records_dict']= process(jury_url)

	st.title("Jury Records")

	jury_url= st.text_area(label= "Please enter the jury records link",
	placeholder="Jury records Link")

	st.text_area("result", value=session_state['jury_records_dict'])

	st.button("Get answer dictionary", on_click=Jury, args=[jury_url])