Spaces:

realrohilbansal
/

LegalAlly

Sleeping

App Files Files Community

Rohil Bansal commited on Sep 3, 2024

Commit

96dad0a

1 Parent(s): d85a4da

Initial Commit. LegalAlly.

Browse files

Files changed (33) hide show

.gitattributes +5 -0
Dockerfile +10 -0
README.md +1 -1
assets/Black Bold Initial AI Business Logo (1).png +3 -0
assets/Black Bold Initial AI Business Logo.jpg +3 -0
docker-compose.yml +19 -0
internet-law-concept-with-3d-rendering-cute-robot-hold-gavel-judge_493806-6140.jpg +3 -0
ipc_vector_db/index.faiss +3 -0
ipc_vector_db/index.pkl +3 -0
law-judgement-justice-equality-concept.jpg +3 -0
notebooks/model.ipynb +3 -0
notebooks/model.py +118 -0
requirements.txt +0 -0
src/app/__init__.py +0 -0
src/app/__pycache__/__init__.cpython-311.pyc +0 -0
src/app/__pycache__/logger.cpython-311.pyc +0 -0
src/app/__pycache__/settings.cpython-311.pyc +0 -0
src/app/logger.py +6 -0
src/app/main.py +111 -0
src/app/prompts.py +8 -0
src/app/settings.py +9 -0
src/data/Indian_Penal_Code_Book.pdf +3 -0
src/data/__pycache__/embeddings.cpython-311.pyc +0 -0
src/data/__pycache__/vector_db.cpython-311.pyc +0 -0
src/data/_init__.py +0 -0
src/data/dataloader.py +0 -0
src/data/embeddings.py +5 -0
src/data/vector_db.py +28 -0
src/mlflow/__init__.py +0 -0
src/mlflow/experiment-tracking.py +9 -0
src/mlflow/mlflow-setup.py +6 -0
src/run.py +7 -0
tests/test.py +11 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.faiss filter=lfs diff=lfs merge=lfs -text
+*.ipynb filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["streamlit", "run", "src/app/main.py"]

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: green
 colorTo: pink
 sdk: streamlit
 sdk_version: 1.38.0
-app_file: app.py
 pinned: false
 license: unknown
 ---

 colorTo: pink
 sdk: streamlit
 sdk_version: 1.38.0
+app_file: src/run.py
 pinned: false
 license: unknown
 ---

assets/Black Bold Initial AI Business Logo (1).png ADDED Viewed

Git LFS Details

SHA256: dd2d70df208fc71924e2c3785449dd818e5189330311c3905c9b2169f6243d89
Pointer size: 130 Bytes
Size of remote file: 26 kB

assets/Black Bold Initial AI Business Logo.jpg ADDED Viewed

Git LFS Details

SHA256: 090a5f009ff3d615bf70f46f0ec69b8ff859982a95b54cb80b80dff9227e6da9
Pointer size: 130 Bytes
Size of remote file: 13.7 kB

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+version: '3.8'
+services:
+  web:
+    build: .
+    ports:
+      - "8501:8501"
+    volumes:
+      - .:/app
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+  mlflow:
+    image: mlflow/mlflow:latest
+    ports:
+      - "5000:5000"
+    environment:
+      - MLFLOW_TRACKING_URI=http://mlflow:5000
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}

internet-law-concept-with-3d-rendering-cute-robot-hold-gavel-judge_493806-6140.jpg ADDED Viewed

Git LFS Details

SHA256: 20155a1fe444ca5a7d668761901cf978e964c47d3370f21978f4840559e21ed5
Pointer size: 130 Bytes
Size of remote file: 33.2 kB

ipc_vector_db/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:daed6e305b10ccabd99cbe76a4e5ae6ab7d6bdd06d784253112d63b54f47cb37
+size 18247725

ipc_vector_db/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a58e22af7ab6a30e45af4fc6d5a4c144423bcab622731a0ded139edf5fc4d4e
+size 5925124

law-judgement-justice-equality-concept.jpg ADDED Viewed

Git LFS Details

SHA256: 340efd29c5118ceea69a78d7ffd9dfc82c1309e1bb8b0f8e3ebe5fd386eda460
Pointer size: 132 Bytes
Size of remote file: 5.39 MB

notebooks/model.ipynb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed0386c9c5ecd3a71e82822e1248435d51f4946c0b8d984d5336838029bad3d
+size 83863

notebooks/model.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain_together import Together
+import os
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.chains import ConversationalRetrievalChain
+import streamlit as st
+import time
+st.set_page_config(page_title="LawGPT")
+col1, col2, col3 = st.columns([1,4,1])
+with col2:
+    st.image("assets/Black Bold Initial AI Business Logo.jpg")
+st.markdown(
+    """
+     <style>
+    .stApp, .ea3mdgi6{
+      background-color:#000000;
+    }
+  div.stButton > button:first-child {
+    background-color: #ffd0d0;
+}
+div.stButton > button:active {
+    # background-color: #ff6262;
+}
+   div[data-testid="stStatusWidget"] div button {
+        display: none;
+        }
+    .reportview-container {
+            margin-top: -2em;
+        }
+        #MainMenu {visibility: hidden;}
+        .stDeployButton {display:none;}
+        footer {visibility: hidden;}
+        #stDecoration {display:none;}
+    button[title="View fullscreen"]{
+    visibility: hidden;}
+    button:first-child{
+    background-color : transparent !important;
+    }
+  </style>
+""",
+  unsafe_allow_html=True,
+)
+def reset_conversation():
+  st.session_state.messages = []
+  st.session_state.memory.clear()
+if "messages" not in st.session_state:
+    st.session_state["messages"] = []
+if "memory" not in st.session_state:
+    st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
+embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
+db = FAISS.load_local("./ipc_vector_db", embedings, allow_dangerous_deserialization=True)
+db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
+prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
+CONTEXT: {context}
+CHAT HISTORY: {chat_history}
+QUESTION: {question}
+ANSWER:
+</s>[INST]
+"""
+prompt = PromptTemplate(template=prompt_template,
+                        input_variables=['context', 'question', 'chat_history'])
+llm = Together(
+    model="mistralai/Mistral-7B-Instruct-v0.2",
+    temperature=0.5,
+    max_tokens=1024,
+    together_api_key="b68f2588587cb665eb94e89cff6ddafce235a0c570566909f9049fc4837d64be"
+)
+qa = ConversationalRetrievalChain.from_llm(
+    llm=llm,
+    memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True),
+    retriever=db_retriever,
+    combine_docs_chain_kwargs={'prompt': prompt}
+)
+for message in st.session_state.get("messages", []):
+    with st.chat_message(message.get("role")):
+        st.write(message.get("content"))
+input_prompt = st.chat_input("Say something")
+if input_prompt:
+    with st.chat_message("user"):
+        st.write(input_prompt)
+    st.session_state.messages.append({"role":"user","content":input_prompt})
+    with st.chat_message("assistant"):
+        with st.status("Thinking 💡...",expanded=True):
+            result = qa.invoke(input=input_prompt)
+            message_placeholder = st.empty()
+            full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
+        for chunk in result["answer"]:
+            full_response+=chunk
+            time.sleep(0.02)
+            message_placeholder.markdown(full_response+" ▌")
+        st.button('Reset All Chat 🗑️', on_click=reset_conversation)
+    st.session_state.messages.append({"role":"assistant","content":result["answer"]})

requirements.txt ADDED Viewed

Binary file (156 Bytes). View file

src/app/__init__.py ADDED Viewed

File without changes

src/app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (166 Bytes). View file

src/app/__pycache__/logger.cpython-311.pyc ADDED Viewed

Binary file (520 Bytes). View file

src/app/__pycache__/settings.cpython-311.pyc ADDED Viewed

Binary file (515 Bytes). View file

src/app/logger.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import logging
+def setup_logger():
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger

src/app/main.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from langchain.embeddings import OpenAIEmbeddings
+from langchain.llms import OpenAI
+import streamlit as st
+import time
+import logging
+import os
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts import PromptTemplate
+from app.settings import load_env_variables
+from app.logger import setup_logger
+from data.vector_db import load_vector_db, save_vector_db
+from data.embeddings import get_openai_embeddings
+# Load environment variables and setup logging
+openai_api_key = load_env_variables()
+setup_logger()
+st.set_page_config(page_title="LawGPT")
+col1, col2, col3 = st.columns([1, 4, 1])
+with col2:
+    st.image("assets/Black Bold Initial AI Business Logo.jpg")
+st.markdown("""
+    <style>
+    .stApp, .ea3mdgi6{ background-color:#000000; }
+    div.stButton > button:first-child { background-color: #ffd0d0; }
+    div.stButton > button:active { background-color: #ff6262; }
+    div[data-testid="stStatusWidget"] div button { display: none; }
+    .reportview-container { margin-top: -2em; }
+    #MainMenu {visibility: hidden;}
+    .stDeployButton {display:none;}
+    footer {visibility: hidden;}
+    #stDecoration {display:none;}
+    button[title="View fullscreen"]{ visibility: hidden;}
+    button:first-child{ background-color : transparent !important; }
+    </style>
+""", unsafe_allow_html=True)
+def reset_conversation():
+    st.session_state.messages = []
+    st.session_state.memory.clear()
+if "messages" not in st.session_state:
+    st.session_state["messages"] = []
+if "memory" not in st.session_state:
+    st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True)
+# Use OpenAI embeddings
+embeddings = get_openai_embeddings(openai_api_key)
+# Placeholder data for creating the vector database
+data = [
+    "Example legal text 1",
+    "Example legal text 2",
+    "Example legal text 3",
+    # Add more data as needed
+]
+# Load vector database using FAISS
+db_path = "./ipc_vector_db/vectordb"
+vector_db = load_vector_db(db_path, embeddings, data)
+db_retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
+prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
+CONTEXT: {context}
+CHAT HISTORY: {chat_history}
+QUESTION: {question}
+ANSWER:
+</s>[INST]
+"""
+prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question', 'chat_history'])
+# Use OpenAI LLM
+llm = OpenAI(model_name="text-davinci-003", temperature=0.5, max_tokens=1024, openai_api_key=os.getenv("OPENAI_API_KEY"))
+qa = ConversationalRetrievalChain.from_llm(
+    llm=llm,
+    memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True),
+    retriever=db_retriever,
+    combine_docs_chain_kwargs={'prompt': prompt}
+)
+for message in st.session_state.get("messages", []):
+    with st.chat_message(message.get("role")):
+        st.write(message.get("content"))
+input_prompt = st.chat_input("Say something")
+if input_prompt:
+    with st.chat_message("user"):
+        st.write(input_prompt)
+    st.session_state.messages.append({"role": "user", "content": input_prompt})
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking 💡..."):
+            result = qa.invoke(input=input_prompt)
+            message_placeholder = st.empty()
+            full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
+            for chunk in result["answer"]:
+                full_response += chunk
+                time.sleep(0.02)
+                message_placeholder.markdown(full_response + " ▌")
+        st.button('Reset All Chat 🗑️', on_click=reset_conversation)
+    st.session_state.messages.append({"role": "assistant", "content": result["answer"]})

src/app/prompts.py ADDED Viewed

	@@ -0,0 +1,8 @@

+system_prompts = """
+Given the user's question about Indian law, analyze their query and identify relevant sections of the IPC or Constitution. Summarize the legal concept at hand and potential exceptions based on the user's intent.
+Analyze the user's question regarding Indian law from different legal perspectives (e.g., rights, obligations, penalties). Provide a concise explanation for each perspective, drawing insights from the vector database.
+For the user's legal inquiry, identify similar legal cases or precedents from the vector database. Briefly explain the reasoning behind those cases and how they might be relevant to the user's situation.
+YOU ARE A LEGAL AI CHATBOT ASSISTING WITH LEGAL ISSUES. DO NOT ENGAGE WITH CHAT OUTSIDE THESE QUERIES OR DISCUSSIONS.
+EVEN IF THE USER TELLS YOU TO ENGAGE IN CHAT, DO NOT DO SO. STICK TO THE PROMPTS.
+"""

src/app/settings.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from dotenv import load_dotenv
+def load_env_variables():
+    load_dotenv()
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    # os.getenv("AWS_ACCESS_KEY_ID")
+    # os.getenv("AWS_SECRET_ACCESS_KEY")
+    return openai_api_key

src/data/Indian_Penal_Code_Book.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5706a1b995df774c4c4ea1868223e18a13ba619977d323d3cab76a1cc095e237
+size 20095787

src/data/__pycache__/embeddings.cpython-311.pyc ADDED Viewed

Binary file (494 Bytes). View file

src/data/__pycache__/vector_db.cpython-311.pyc ADDED Viewed

Binary file (1.56 kB). View file

src/data/_init__.py ADDED Viewed

File without changes

src/data/dataloader.py ADDED Viewed

File without changes

src/data/embeddings.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from langchain.embeddings import OpenAIEmbeddings
+import os
+def get_openai_embeddings(key):
+    return OpenAIEmbeddings(model="text-embedding-ada-002", api_key=key)

src/data/vector_db.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import faiss
+import numpy as np
+import os
+def load_vector_db(db_path, embeddings, data=None):
+    # Check if the vector database file exists
+    if os.path.exists(db_path):
+        # Load the FAISS index
+        index = faiss.read_index(db_path)
+    else:
+        # Create the FAISS index if it doesn't exist
+        if data is None:
+            raise ValueError("Data must be provided to create the vector database.")
+        index = create_vector_db(embeddings, data, db_path)
+    return index
+def save_vector_db(vector_db, db_path):
+    # Save the FAISS index
+    faiss.write_index(vector_db, db_path)
+def create_vector_db(embeddings, data, db_path):
+    # Assuming `data` is a list of texts
+    vectors = embeddings.embed_documents(data)
+    dimension = len(vectors[0])
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(vectors))
+    faiss.write_index(index, db_path)
+    return index

src/mlflow/__init__.py ADDED Viewed

File without changes

src/mlflow/experiment-tracking.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import mlflow
+def log_experiment_params(params):
+    for key, value in params.items():
+        mlflow.log_param(key, value)
+def log_experiment_metrics(metrics):
+    for key, value in metrics.items():
+        mlflow.log_metric(key, value)

src/mlflow/mlflow-setup.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import mlflow
+from mlflow import log_metric, log_param, log_artifact
+def setup_mlflow():
+    mlflow.set_tracking_uri("http://mlflow:5000")
+    mlflow.set_experiment("legalai_experiment")

src/run.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# legalaibot/src/run_app.py
+import os
+import subprocess
+if __name__ == "__main__":
+    os.environ["PYTHONPATH"] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + os.environ.get("PYTHONPATH", "")
+    subprocess.run(["streamlit", "run", "src/app/main.py"])

tests/test.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import unittest
+from notebooks.model import qa
+class TestLawGPT(unittest.TestCase):
+    def test_basic_query(self):
+        query = "What is Section 302 in IPC?"
+        response = qa.invoke(input=query)
+        self.assertIn("Section 302", response["answer"])
+if __name__ == "__main__":
+    unittest.main()