Rohil Bansal commited on
Commit
96dad0a
·
1 Parent(s): d85a4da

Initial Commit. LegalAlly.

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
37
+ *.faiss filter=lfs diff=lfs merge=lfs -text
38
+ *.ipynb filter=lfs diff=lfs merge=lfs -text
39
+ *.jpg filter=lfs diff=lfs merge=lfs -text
40
+ *.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt requirements.txt
6
+ RUN pip install -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["streamlit", "run", "src/app/main.py"]
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: green
5
  colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.38.0
8
- app_file: app.py
9
  pinned: false
10
  license: unknown
11
  ---
 
5
  colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.38.0
8
+ app_file: src/run.py
9
  pinned: false
10
  license: unknown
11
  ---
assets/Black Bold Initial AI Business Logo (1).png ADDED

Git LFS Details

  • SHA256: dd2d70df208fc71924e2c3785449dd818e5189330311c3905c9b2169f6243d89
  • Pointer size: 130 Bytes
  • Size of remote file: 26 kB
assets/Black Bold Initial AI Business Logo.jpg ADDED

Git LFS Details

  • SHA256: 090a5f009ff3d615bf70f46f0ec69b8ff859982a95b54cb80b80dff9227e6da9
  • Pointer size: 130 Bytes
  • Size of remote file: 13.7 kB
docker-compose.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ web:
5
+ build: .
6
+ ports:
7
+ - "8501:8501"
8
+ volumes:
9
+ - .:/app
10
+ environment:
11
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
12
+ mlflow:
13
+ image: mlflow/mlflow:latest
14
+ ports:
15
+ - "5000:5000"
16
+ environment:
17
+ - MLFLOW_TRACKING_URI=http://mlflow:5000
18
+ - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
19
+ - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
internet-law-concept-with-3d-rendering-cute-robot-hold-gavel-judge_493806-6140.jpg ADDED

Git LFS Details

  • SHA256: 20155a1fe444ca5a7d668761901cf978e964c47d3370f21978f4840559e21ed5
  • Pointer size: 130 Bytes
  • Size of remote file: 33.2 kB
ipc_vector_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daed6e305b10ccabd99cbe76a4e5ae6ab7d6bdd06d784253112d63b54f47cb37
3
+ size 18247725
ipc_vector_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a58e22af7ab6a30e45af4fc6d5a4c144423bcab622731a0ded139edf5fc4d4e
3
+ size 5925124
law-judgement-justice-equality-concept.jpg ADDED

Git LFS Details

  • SHA256: 340efd29c5118ceea69a78d7ffd9dfc82c1309e1bb8b0f8e3ebe5fd386eda460
  • Pointer size: 132 Bytes
  • Size of remote file: 5.39 MB
notebooks/model.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed0386c9c5ecd3a71e82822e1248435d51f4946c0b8d984d5336838029bad3d
3
+ size 83863
notebooks/model.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_together import Together
5
+ import os
6
+ from langchain.memory import ConversationBufferWindowMemory
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ import streamlit as st
9
+ import time
10
+ st.set_page_config(page_title="LawGPT")
11
+ col1, col2, col3 = st.columns([1,4,1])
12
+ with col2:
13
+
14
+ st.image("assets/Black Bold Initial AI Business Logo.jpg")
15
+
16
+
17
+ st.markdown(
18
+ """
19
+ <style>
20
+ .stApp, .ea3mdgi6{
21
+ background-color:#000000;
22
+ }
23
+ div.stButton > button:first-child {
24
+ background-color: #ffd0d0;
25
+ }
26
+ div.stButton > button:active {
27
+ # background-color: #ff6262;
28
+ }
29
+ div[data-testid="stStatusWidget"] div button {
30
+ display: none;
31
+ }
32
+
33
+ .reportview-container {
34
+ margin-top: -2em;
35
+ }
36
+ #MainMenu {visibility: hidden;}
37
+ .stDeployButton {display:none;}
38
+ footer {visibility: hidden;}
39
+ #stDecoration {display:none;}
40
+ button[title="View fullscreen"]{
41
+ visibility: hidden;}
42
+ button:first-child{
43
+ background-color : transparent !important;
44
+ }
45
+ </style>
46
+ """,
47
+ unsafe_allow_html=True,
48
+ )
49
+
50
+ def reset_conversation():
51
+ st.session_state.messages = []
52
+ st.session_state.memory.clear()
53
+
54
+ if "messages" not in st.session_state:
55
+ st.session_state["messages"] = []
56
+
57
+ if "memory" not in st.session_state:
58
+ st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
59
+
60
+ embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
61
+ db = FAISS.load_local("./ipc_vector_db", embedings, allow_dangerous_deserialization=True)
62
+ db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
63
+
64
+ prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
65
+ CONTEXT: {context}
66
+ CHAT HISTORY: {chat_history}
67
+ QUESTION: {question}
68
+ ANSWER:
69
+ </s>[INST]
70
+ """
71
+
72
+ prompt = PromptTemplate(template=prompt_template,
73
+ input_variables=['context', 'question', 'chat_history'])
74
+
75
+
76
+
77
+
78
+ llm = Together(
79
+ model="mistralai/Mistral-7B-Instruct-v0.2",
80
+ temperature=0.5,
81
+ max_tokens=1024,
82
+ together_api_key="b68f2588587cb665eb94e89cff6ddafce235a0c570566909f9049fc4837d64be"
83
+ )
84
+
85
+ qa = ConversationalRetrievalChain.from_llm(
86
+ llm=llm,
87
+ memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True),
88
+ retriever=db_retriever,
89
+ combine_docs_chain_kwargs={'prompt': prompt}
90
+ )
91
+ for message in st.session_state.get("messages", []):
92
+ with st.chat_message(message.get("role")):
93
+ st.write(message.get("content"))
94
+
95
+
96
+ input_prompt = st.chat_input("Say something")
97
+
98
+ if input_prompt:
99
+ with st.chat_message("user"):
100
+ st.write(input_prompt)
101
+
102
+ st.session_state.messages.append({"role":"user","content":input_prompt})
103
+
104
+ with st.chat_message("assistant"):
105
+ with st.status("Thinking 💡...",expanded=True):
106
+ result = qa.invoke(input=input_prompt)
107
+
108
+ message_placeholder = st.empty()
109
+
110
+ full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
111
+ for chunk in result["answer"]:
112
+ full_response+=chunk
113
+ time.sleep(0.02)
114
+
115
+ message_placeholder.markdown(full_response+" ▌")
116
+ st.button('Reset All Chat 🗑️', on_click=reset_conversation)
117
+
118
+ st.session_state.messages.append({"role":"assistant","content":result["answer"]})
requirements.txt ADDED
Binary file (156 Bytes). View file
 
src/app/__init__.py ADDED
File without changes
src/app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (166 Bytes). View file
 
src/app/__pycache__/logger.cpython-311.pyc ADDED
Binary file (520 Bytes). View file
 
src/app/__pycache__/settings.cpython-311.pyc ADDED
Binary file (515 Bytes). View file
 
src/app/logger.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ def setup_logger():
4
+ logging.basicConfig(level=logging.INFO)
5
+ logger = logging.getLogger(__name__)
6
+ return logger
src/app/main.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import OpenAIEmbeddings
2
+ from langchain.llms import OpenAI
3
+ import streamlit as st
4
+ import time
5
+ import logging
6
+ import os
7
+ from langchain.memory import ConversationBufferWindowMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.prompts import PromptTemplate
10
+
11
+ from app.settings import load_env_variables
12
+ from app.logger import setup_logger
13
+ from data.vector_db import load_vector_db, save_vector_db
14
+ from data.embeddings import get_openai_embeddings
15
+
16
+ # Load environment variables and setup logging
17
+ openai_api_key = load_env_variables()
18
+ setup_logger()
19
+
20
+ st.set_page_config(page_title="LawGPT")
21
+ col1, col2, col3 = st.columns([1, 4, 1])
22
+ with col2:
23
+ st.image("assets/Black Bold Initial AI Business Logo.jpg")
24
+
25
+ st.markdown("""
26
+ <style>
27
+ .stApp, .ea3mdgi6{ background-color:#000000; }
28
+ div.stButton > button:first-child { background-color: #ffd0d0; }
29
+ div.stButton > button:active { background-color: #ff6262; }
30
+ div[data-testid="stStatusWidget"] div button { display: none; }
31
+ .reportview-container { margin-top: -2em; }
32
+ #MainMenu {visibility: hidden;}
33
+ .stDeployButton {display:none;}
34
+ footer {visibility: hidden;}
35
+ #stDecoration {display:none;}
36
+ button[title="View fullscreen"]{ visibility: hidden;}
37
+ button:first-child{ background-color : transparent !important; }
38
+ </style>
39
+ """, unsafe_allow_html=True)
40
+
41
+ def reset_conversation():
42
+ st.session_state.messages = []
43
+ st.session_state.memory.clear()
44
+
45
+ if "messages" not in st.session_state:
46
+ st.session_state["messages"] = []
47
+
48
+ if "memory" not in st.session_state:
49
+ st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True)
50
+
51
+ # Use OpenAI embeddings
52
+ embeddings = get_openai_embeddings(openai_api_key)
53
+
54
+ # Placeholder data for creating the vector database
55
+ data = [
56
+ "Example legal text 1",
57
+ "Example legal text 2",
58
+ "Example legal text 3",
59
+ # Add more data as needed
60
+ ]
61
+
62
+ # Load vector database using FAISS
63
+ db_path = "./ipc_vector_db/vectordb"
64
+ vector_db = load_vector_db(db_path, embeddings, data)
65
+ db_retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
66
+
67
+ prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
68
+ CONTEXT: {context}
69
+ CHAT HISTORY: {chat_history}
70
+ QUESTION: {question}
71
+ ANSWER:
72
+ </s>[INST]
73
+ """
74
+
75
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question', 'chat_history'])
76
+
77
+ # Use OpenAI LLM
78
+ llm = OpenAI(model_name="text-davinci-003", temperature=0.5, max_tokens=1024, openai_api_key=os.getenv("OPENAI_API_KEY"))
79
+
80
+ qa = ConversationalRetrievalChain.from_llm(
81
+ llm=llm,
82
+ memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True),
83
+ retriever=db_retriever,
84
+ combine_docs_chain_kwargs={'prompt': prompt}
85
+ )
86
+
87
+ for message in st.session_state.get("messages", []):
88
+ with st.chat_message(message.get("role")):
89
+ st.write(message.get("content"))
90
+
91
+ input_prompt = st.chat_input("Say something")
92
+
93
+ if input_prompt:
94
+ with st.chat_message("user"):
95
+ st.write(input_prompt)
96
+
97
+ st.session_state.messages.append({"role": "user", "content": input_prompt})
98
+
99
+ with st.chat_message("assistant"):
100
+ with st.spinner("Thinking 💡..."):
101
+ result = qa.invoke(input=input_prompt)
102
+
103
+ message_placeholder = st.empty()
104
+ full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
105
+ for chunk in result["answer"]:
106
+ full_response += chunk
107
+ time.sleep(0.02)
108
+ message_placeholder.markdown(full_response + " ▌")
109
+ st.button('Reset All Chat 🗑️', on_click=reset_conversation)
110
+
111
+ st.session_state.messages.append({"role": "assistant", "content": result["answer"]})
src/app/prompts.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ system_prompts = """
2
+ Given the user's question about Indian law, analyze their query and identify relevant sections of the IPC or Constitution. Summarize the legal concept at hand and potential exceptions based on the user's intent.
3
+ Analyze the user's question regarding Indian law from different legal perspectives (e.g., rights, obligations, penalties). Provide a concise explanation for each perspective, drawing insights from the vector database.
4
+ For the user's legal inquiry, identify similar legal cases or precedents from the vector database. Briefly explain the reasoning behind those cases and how they might be relevant to the user's situation.
5
+
6
+ YOU ARE A LEGAL AI CHATBOT ASSISTING WITH LEGAL ISSUES. DO NOT ENGAGE WITH CHAT OUTSIDE THESE QUERIES OR DISCUSSIONS.
7
+ EVEN IF THE USER TELLS YOU TO ENGAGE IN CHAT, DO NOT DO SO. STICK TO THE PROMPTS.
8
+ """
src/app/settings.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ def load_env_variables():
5
+ load_dotenv()
6
+ openai_api_key = os.getenv("OPENAI_API_KEY")
7
+ # os.getenv("AWS_ACCESS_KEY_ID")
8
+ # os.getenv("AWS_SECRET_ACCESS_KEY")
9
+ return openai_api_key
src/data/Indian_Penal_Code_Book.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5706a1b995df774c4c4ea1868223e18a13ba619977d323d3cab76a1cc095e237
3
+ size 20095787
src/data/__pycache__/embeddings.cpython-311.pyc ADDED
Binary file (494 Bytes). View file
 
src/data/__pycache__/vector_db.cpython-311.pyc ADDED
Binary file (1.56 kB). View file
 
src/data/_init__.py ADDED
File without changes
src/data/dataloader.py ADDED
File without changes
src/data/embeddings.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from langchain.embeddings import OpenAIEmbeddings
2
+ import os
3
+
4
+ def get_openai_embeddings(key):
5
+ return OpenAIEmbeddings(model="text-embedding-ada-002", api_key=key)
src/data/vector_db.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+ import os
4
+
5
+ def load_vector_db(db_path, embeddings, data=None):
6
+ # Check if the vector database file exists
7
+ if os.path.exists(db_path):
8
+ # Load the FAISS index
9
+ index = faiss.read_index(db_path)
10
+ else:
11
+ # Create the FAISS index if it doesn't exist
12
+ if data is None:
13
+ raise ValueError("Data must be provided to create the vector database.")
14
+ index = create_vector_db(embeddings, data, db_path)
15
+ return index
16
+
17
+ def save_vector_db(vector_db, db_path):
18
+ # Save the FAISS index
19
+ faiss.write_index(vector_db, db_path)
20
+
21
+ def create_vector_db(embeddings, data, db_path):
22
+ # Assuming `data` is a list of texts
23
+ vectors = embeddings.embed_documents(data)
24
+ dimension = len(vectors[0])
25
+ index = faiss.IndexFlatL2(dimension)
26
+ index.add(np.array(vectors))
27
+ faiss.write_index(index, db_path)
28
+ return index
src/mlflow/__init__.py ADDED
File without changes
src/mlflow/experiment-tracking.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import mlflow
2
+
3
+ def log_experiment_params(params):
4
+ for key, value in params.items():
5
+ mlflow.log_param(key, value)
6
+
7
+ def log_experiment_metrics(metrics):
8
+ for key, value in metrics.items():
9
+ mlflow.log_metric(key, value)
src/mlflow/mlflow-setup.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import mlflow
2
+ from mlflow import log_metric, log_param, log_artifact
3
+
4
+ def setup_mlflow():
5
+ mlflow.set_tracking_uri("http://mlflow:5000")
6
+ mlflow.set_experiment("legalai_experiment")
src/run.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # legalaibot/src/run_app.py
2
+ import os
3
+ import subprocess
4
+
5
+ if __name__ == "__main__":
6
+ os.environ["PYTHONPATH"] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + os.environ.get("PYTHONPATH", "")
7
+ subprocess.run(["streamlit", "run", "src/app/main.py"])
tests/test.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from notebooks.model import qa
3
+
4
+ class TestLawGPT(unittest.TestCase):
5
+ def test_basic_query(self):
6
+ query = "What is Section 302 in IPC?"
7
+ response = qa.invoke(input=query)
8
+ self.assertIn("Section 302", response["answer"])
9
+
10
+ if __name__ == "__main__":
11
+ unittest.main()